Python QCryptographicHash.result Examples

Programming Language: Python

Namespace/Package Name: PyQt4.QtCore

Method/Function: result

Examples at hotexamples.com: 4

Python QCryptographicHash.result - 4 examples found. These are the top rated real world Python examples of PyQt4.QtCore.QCryptographicHash.result extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

addData(2)

result(2)

QCryptographicHash(1)

hash(1)

reset(1)

Example #1

Show file

File: __init__.py Project: jkunle/paul

def qmd5_hash(salt, data=None):
    """
    Returns the hashed output of MD5Sum on salt, data
    using PyQt4.QCryptographicHash.

    :param salt: Initial salt
    :param data: OPTIONAL Data to hash
    :returns: str
    """
    log.debug('qmd5_hash(salt="%s"' % salt)
    hash_obj = QHash(QHash.Md5)
    hash_obj.addData(salt)
    hash_obj.addData(data)
    hash_value = hash_obj.result().toHex()
    log.debug('qmd5_hash() returning "%s"' % hash_value)
    return hash_value.data()

Example #2

Show file

File: __init__.py Project: crossroadchurch/paul

def qmd5_hash(salt, data=None):
    """
    Returns the hashed output of MD5Sum on salt, data
    using PyQt4.QCryptographicHash.

    :param salt: Initial salt
    :param data: OPTIONAL Data to hash
    :returns: str
    """
    log.debug('qmd5_hash(salt="%s"' % salt)
    hash_obj = QHash(QHash.Md5)
    hash_obj.addData(salt)
    hash_obj.addData(data)
    hash_value = hash_obj.result().toHex()
    log.debug('qmd5_hash() returning "%s"' % hash_value)
    return hash_value.data()

Example #3

Show file

File: pqEdit.py Project: tallforasmurf/PPQT

class PPTextEditor(QPlainTextEdit):
    # Initialize the editor on creation.
    def __init__(self, parent=None, fontsize=12 ):
        super(PPTextEditor, self).__init__(parent)
        # Do not allow line-wrap; horizontal scrollbar appears when required.
        self.setLineWrapMode(QPlainTextEdit.NoWrap)
        # make sure when we jump to a line, it goes to the window center
        self.setCenterOnScroll(True)
        # Get a monospaced font as selected by the user with View>Font
        self.setFont(pqMsgs.getMonoFont(fontsize,True))
        # instantiate our "syntax" highlighter object, but link it to an empty
        # QTextDocument. We will redirect it to our actual document only after
        # loading a document, as it relies on metadata, and then only when/if
        # the IMC.*HiliteSwitch es are on.
        self.nulDoc = QTextDocument() # make a null document
        self.hiliter = wordHighLighter(self.nulDoc)
        # all the metadata lists will be initialized when self.clear() is
        # called from pqMain, shortly.
        # save a regex for quickly finding if a selection is a single word
        self.oneWordRE = QRegExp(u'^\W*(\w{2,})\W*$')
        self.menuWord = QString()
        # Create and initialize an SHA-1 hash machine
        self.cuisineart = QCryptographicHash(QCryptographicHash.Sha1)

    # switch on or off our text-highlighting. By switching the highlighter
    # to a null document we remove highlighting; by switching it back to
    # the real document, we cause re-highlighting of everything. This makes
    # significant delay for a large document, so put up a status message
    # during it by starting and ending a progress bar.
    def setHighlight(self, onoff):
        self.hiliter.setDocument(self.nulDoc) # turn off hiliting always
        if onoff:
            pqMsgs.showStatusMsg("Setting Scanno/Spelling Highlights...")
            self.hiliter.setDocument(self.document())
            pqMsgs.clearStatusMsg()

    # Implement clear/new. Just toss everything we keep.
    def clear(self):
        self.setHighlight(False)
        self.document().clear()
        self.document().setModified(False)
        self.bookMarkList = \
            [None, None, None, None, None, None, None, None, None]
        IMC.pageTable.clear()
        IMC.goodWordList.clear()
        IMC.badWordList.clear()
        IMC.wordCensus.clear()
        IMC.charCensus.clear()
        IMC.notesEditor.clear()
        IMC.pngPanel.clear()
        IMC.needSpellCheck = False
        IMC.needMetadataSave = 0x00
        IMC.staleCensus = 0x00
        IMC.bookSaveEncoding = QString(u'UTF-8')
        IMC.bookMainDict = IMC.spellCheck.mainTag
        # force a cursor "move" in order to create a cursorMoved signal that will
        # clear the status line - then undo it so the document isn't modified.
        self.textCursor().insertText(QString(' '))
        self.document().undo()


    # Implement the Edit menu items:
    # Edit > ToUpper,  Edit > ToTitle,  Edit > ToLower
    # Note that in full Unicode, changing letter case is not so simple as it
    # was in Latin-1! We use the QChar and QString facilities to do it, and
    # a regex in a loop to pick off words. Restore the current selection after
    # so another operation can be done on it.
    # N.B. it is not possible to do self.textCursor().setPosition(), it seems
    # that self.textCursor() is "const". One has to create a new cursor,
    # position it, and install it on the document with self.setTextCursor().
    def toUpperCase(self):
        global reWord
        tc = QTextCursor(self.textCursor())
        if not tc.hasSelection() :
            return # no selection, nothing to do
        startpos = tc.selectionStart()
        endpos = tc.selectionEnd()
        qs = QString(tc.selectedText()) # copy of selected text
        i = reWord.indexIn(qs,0) # index of first word if any
        if i < 0 : return # no words in selection, exit
        while i >= 0:
            w = reWord.cap(0) # found word as QString
            n = w.size() # its length
            qs.replace(i,n,w.toUpper()) # replace it with UC version
            i = reWord.indexIn(qs,i+n) # find next word if any
        # we have changed at least one word, replace selection with altered text
        tc.insertText(qs)
        # that wiped the selection, so restore it by "dragging" left to right
        tc.setPosition(startpos,QTextCursor.MoveAnchor) # click
        tc.setPosition(endpos,QTextCursor.KeepAnchor)   # drag
        self.setTextCursor(tc)

    # to-lower is identical except for the method call.
    def toLowerCase(self):
        global reWord # the regex \b\w+\b
        tc = QTextCursor(self.textCursor())
        if not tc.hasSelection() :
            return # no selection, nothing to do
        startpos = tc.selectionStart()
        endpos = tc.selectionEnd()
        qs = QString(tc.selectedText()) # copy of selected text
        i = reWord.indexIn(qs,0) # index of first word if any
        if i < 0 : return # no words in selection, exit
        while i >= 0:
            w = reWord.cap(0) # found word as QString
            n = w.size() # its length
            qs.replace(i,n,w.toLower()) # replace it with UC version
            i = reWord.indexIn(qs,i+n) # find next word if any
        # we have changed at least one word, replace selection with altered text
        tc.insertText(qs)
        # that wiped the selection, so restore it by "dragging" left to right
        tc.setPosition(startpos,QTextCursor.MoveAnchor) # click
        tc.setPosition(endpos,QTextCursor.KeepAnchor)   # drag
        self.setTextCursor(tc)

    # toTitle is similar but we have to change the word to lowercase (in case
    # it is uppercase now) and then change the initial character to upper.
    # Note it would be possible to write a smarter version that looked up the
    # word in a list of common adjectives, connectives, and adverbs and avoided
    # capitalizing a, and, of, by and so forth. Not gonna happen.
    def toTitleCase(self):
        global reWord # the regex \b\w+\b
        self.toLowerCase()
        tc = QTextCursor(self.textCursor())
        if not tc.hasSelection() :
            return # no selection, nothing to do
        startpos = tc.selectionStart()
        endpos = tc.selectionEnd()
        qs = QString(tc.selectedText()) # copy of selected text
        i = reWord.indexIn(qs,0) # index of first word if any
        if i < 0 : return # no words in selection, exit
        while i >= 0:
            w = reWord.cap(0) # found word as QString
            n = w.size()
            qs.replace(i,1,qs.at(i).toUpper()) # replace initial with UC
            i = reWord.indexIn(qs,i+n) # find next word if any
        # we have changed at least one word, replace selection with altered text
        tc.insertText(qs)
        # that wiped the selection, so restore it by "dragging" left to right
        tc.setPosition(startpos,QTextCursor.MoveAnchor) # click
        tc.setPosition(endpos,QTextCursor.KeepAnchor)   # drag
        self.setTextCursor(tc)

    # Re-implement the parent's keyPressEvent in order to provide some
    # special controls. (Note on Mac, "ctrl-" is "cmd-" and "alt-" is "opt-")
    # ctrl-plus increases the edit font size 1 pt
    # (n.b. ctrl-plus probably only comes from a keypad, we usually just get
    #  ctrl-shift-equals instead of plus)
    # ctrl-minus decreases the edit font size 1 pt
    # ctrl-<n> for n in 1..9 jumps the insertion point to bookmark <n>
    # ctrl-shift-<n> extends the selection to bookmark <n>
    # ctrl-alt-<n> sets bookmark n at the current position
    def keyPressEvent(self, event):
        #pqMsgs.printKeyEvent(event)
        kkey = int( int(event.modifiers()) & IMC.keypadDeModifier) | int(event.key())
        # add as little overhead as possible: if it isn't ours, pass it on.
        if kkey in IMC.keysOfInterest : # we trust python to do this quickly
            event.accept() # we handle this one
            if kkey in IMC.findKeys:
                # ^f, ^g, etc. -- just pass them straight to the Find panel
                self.emit(SIGNAL("editKeyPress"),kkey)
            elif kkey in IMC.zoomKeys :
                # n.b. the self.font and setFont methods inherit from QWidget
                # Point increment by which to change.
                n = (-1) if (kkey == IMC.ctl_minus) else 1
                # Actual point size currently in use, plus increment
                p = self.fontInfo().pointSize() + n
                if (p > 3) and (p < 65): # don't let's get ridiculous, hmm?
                    # Simply calling self.font().setPointSize() had no effect,
                    # we have to actually call setFont() to make change happen.
                    f = self.font() # so get our font,
                    f.setPointSize(p) # change its point size +/-
                    self.setFont(f) # and put the font back
                    IMC.fontSize = p # and remember the size for shutdown time
            elif kkey in IMC.markKeys : # ^1-9, jump to bookmark
                bkn = kkey - IMC.ctl_1 # make it 0-8
                if self.bookMarkList[bkn] is not None: # if that bookmark is set,
                    self.setTextCursor(self.bookMarkList[bkn]) # jump to it
            elif kkey in IMC.markShiftKeys : # shift-ctl-1/9, select to mark
                # Make our document cursor's selection go from our current ANCHOR
                # to the POSITION from the bookmark cursor.
                mark_tc = self.bookMarkList[kkey - IMC.ctl_shft_1]
                if mark_tc is not None:
                    tc = QTextCursor(self.textCursor())
                    tc.setPosition(mark_tc.position(),QTextCursor.KeepAnchor)
                    self.setTextCursor(tc)
            elif kkey in IMC.markSetKeys : # ctl-alt-1-9, set a bookmark
                bkn = kkey - IMC.ctl_alt_1 # make it 0-8
                self.bookMarkList[bkn] = QTextCursor(self.textCursor())
                IMC.needMetadataSave |= IMC.bookmarksChanged
        else: # not in keysOfInterest, so pass it up to parent
            event.ignore()
            super(PPTextEditor, self).keyPressEvent(event)

    # Called from pqFind after doing a successful search, this method centers the
    # current selection (which is the result of the find) in the window. If the selection
    # is large, put the top of the selection higher than center but on no account
    # above the top of the viewport. Two problems arise: One, the rectangles returned
    # by .cursorRect() and by .viewport().geometry() are in pixel units, while the
    # vertical scrollbar is sized in logical text lines. So we work out the adjustment
    # as a fraction of the viewport, times the scrollbar's pageStep value to get lines.
    # Two, cursorRect gives only the height of the actual cursor, not of the selected
    # text. To find out the height of the full selection we have to get a cursorRect
    # for the start of the selection, and another for the end of it.
    def centerCursor(self) :
        tc = QTextCursor(self.textCursor()) # copy the working cursor with its selection
        top_point = tc.position() # one end of selection, in character units
        bot_point = tc.anchor() # ..and the other end
        if top_point > bot_point : # often the position is > the anchor
            (top_point, bot_point) = (bot_point, top_point)
        tc.setPosition(top_point) # cursor for the top of the selection
        selection_top = self.cursorRect(tc).top() # ..get its top pixel
        line_height = self.cursorRect(tc).height() # and save height of one line
        tc.setPosition(bot_point) # cursor for the end of the selection
        selection_bot = self.cursorRect(tc).bottom() # ..selection's bottom pixel
        selection_height = selection_bot - selection_top + 1 # selection height in pixels
        view_height = self.viewport().geometry().height() # scrolled area's height in px
        view_half = view_height >> 1 # int(view_height/2)
        pixel_adjustment = 0
        if selection_height < view_half :
            # selected text is less than half the window height: center the top of the
            # selection, i.e., make the cursor_top equal to view_half.
            pixel_adjustment = selection_top - view_half # may be negative
        else :
            # selected text is taller than half the window, can we show it all?
            if selection_height < (view_height - line_height) :
                # all selected text fits in the viewport (with a little free): center it.
                pixel_adjustment = (selection_top + (selection_height/2)) - view_half
            else :
                # not all selected text fits the window, put text top near window top
                pixel_adjustment = selection_top - line_height
        # OK, convert the pixel adjustment to a line-adjustment based on the assumption
        # that a scrollbar pageStep is the height of the viewport in lines.
        adjust_fraction = pixel_adjustment / view_height
        vscroller = self.verticalScrollBar()
        page_step = vscroller.pageStep() # lines in a viewport page, actually less 1
        adjust_lines = int(page_step * adjust_fraction)
        target = vscroller.value() + adjust_lines
        if (target >= 0) and (target <= vscroller.maximum()) :
            vscroller.setValue(target)



    # Catch the contextMenu event and extend the standard context menu with
    # a separator and the option to add a word to good-words, but only when
    # there is a selection and it encompasses just one word.
    def contextMenuEvent(self,event) :
        ctx_menu = self.createStandardContextMenu()
        if self.textCursor().hasSelection :
            qs = self.textCursor().selectedText()
            if 0 == self.oneWordRE.indexIn(qs) : # it matches at 0 or not at all
                self.menuWord = self.oneWordRE.cap(1) # save the word
                ctx_menu.addSeparator()
                gw_name = QString(self.menuWord) # make a copy
                gw_action = ctx_menu.addAction(gw_name.append(QString(u' -> Goodwords')))
                self.connect(gw_action, SIGNAL("triggered()"), self.addToGW)
        ctx_menu.exec_(event.globalPos())

    # This slot receives the "someword -> good_words" context menu action
    def addToGW(self) :
        IMC.goodWordList.insert(self.menuWord)
        IMC.needMetadataSave |= IMC.goodwordsChanged
        IMC.needSpellCheck = True
        IMC.mainWindow.setWinModStatus()

    # Implement save: the main window opens the files for output using
    # QIODevice::WriteOnly, which wipes the contents (contrary to the doc)
    # so we need to write the document and metadata regardless of whether
    # they've been modified. However we avoid rebuilding metadata if we can.
    def save(self, dataStream, metaStream):
        # Get the contents of the document as a QString
        doc_text = self.toPlainText()
        # Calculate the SHA-1 hash over the document and save it in both hash
        # fields of the IMC.
        self.cuisineart.reset()
        self.cuisineart.addData(doc_text)
        IMC.metaHash = IMC.documentHash = bytes(self.cuisineart.result()).__repr__()
        # write the document, which is pretty simple in the QStream world
        dataStream << doc_text
        dataStream.flush()
        #self.rebuildMetadata() # update any census that needs it
        self.writeMetadata(metaStream)
        metaStream.flush()
        IMC.needMetadataSave = 0x00
        self.document().setModified(False) # this triggers main.setWinModStatus()

    def writeMetadata(self,metaStream):
        # Writing the metadata takes a bit more work.
        # pageTable goes out between {{PAGETABLE}}..{{/PAGETABLE}}
        metaStream << u"{{VERSION 0}}\n" # meaningless at the moment
        metaStream << u"{{ENCODING "
        metaStream << unicode(IMC.bookSaveEncoding)
        metaStream << u"}}\n"
        metaStream << u"{{STALECENSUS "
        if 0 == IMC.staleCensus :
            metaStream << u"FALSE"
        else:
            metaStream << u"TRUE"
        metaStream << u"}}\n"
        metaStream << u"{{NEEDSPELLCHECK "
        if 0 == IMC.needSpellCheck :
            metaStream << u"FALSE"
        else:
            metaStream << u"TRUE"
        metaStream << u"}}\n"
        metaStream << u"{{MAINDICT "
        metaStream << unicode(IMC.bookMainDict)
        metaStream << u"}}\n"
        # The hash could contain any character. Using __repr__ ensured
        # it is enclosed in balanced single or double quotes but to be
        # double sure we will fence it in characters we can spot with a regex.
        metaStream << u"{{DOCHASH " + IMC.documentHash + u" }}\n"
        if IMC.pageTable.size() :
            metaStream << u"{{PAGETABLE}}\n"
            for i in range(IMC.pageTable.size()) :
                metaStream << IMC.pageTable.metaStringOut(i)
            metaStream << u"{{/PAGETABLE}}\n"
        if IMC.charCensus.size() :
            metaStream << u"{{CHARCENSUS}}\n"
            for i in range(IMC.charCensus.size()):
                (w,n,f) = IMC.charCensus.get(i)
                metaStream << "{0} {1} {2}\n".format(unicode(w), n, f)
            metaStream << u"{{/CHARCENSUS}}\n"
        if IMC.wordCensus.size() :
            metaStream << u"{{WORDCENSUS}}\n"
            for i in range(IMC.wordCensus.size()):
                (w,n,f) = IMC.wordCensus.get(i)
                metaStream << "{0} {1} {2}\n".format(unicode(w), n, f)
            metaStream << u"{{/WORDCENSUS}}\n"
        metaStream << u"{{BOOKMARKS}}\n"
        for i in range(9): # 0..8
            if self.bookMarkList[i] is not None :
                metaStream << "{0} {1} {2}\n".format(i,self.bookMarkList[i].position(),self.bookMarkList[i].anchor())
        metaStream << u"{{/BOOKMARKS}}\n"
        metaStream << u"{{NOTES}}\n"
        d = IMC.notesEditor.document()
        if not d.isEmpty():
            for i in range( d.blockCount() ):
                t = d.findBlockByNumber(i).text()
                if t.startsWith("{{"):
                    t.prepend(u"\xfffd") # Unicode Replacement char
                metaStream << t + "\n"
            IMC.notesEditor.document().setModified(False)
        metaStream << u"{{/NOTES}}\n"
        if IMC.goodWordList.active() : # have some good words
            metaStream << u"{{GOODWORDS}}\n"
            IMC.goodWordList.save(metaStream)
            metaStream << u"{{/GOODWORDS}}\n"
        if IMC.badWordList.active() : # have some bad words
            metaStream << u"{{BADWORDS}}\n"
            IMC.badWordList.save(metaStream)
            metaStream << u"{{/BADWORDS}}\n"
        p1 = self.textCursor().selectionStart()
        p2 = self.textCursor().selectionEnd()
        metaStream << u"{{CURSOR "+unicode(p1)+u' '+unicode(p2)+u"}}\n"
        metaStream.flush()

    # Implement load: the main window has the job of finding and opening files
    # then passes QTextStreams ready to read here. If metaStream is None,
    # no metadata file was found and we construct the metadata.
    # n.b. before main calls here, it calls our .clear, hence lists are
    # empty, hiliting is off, etc.

    def load(self, dataStream, metaStream, goodStream, badStream):
        # Load the document file into the editor
        self.setPlainText(dataStream.readAll())
        # Initialize the hash value for the document, which will be equal unless
        # we read something different from the metadata file.
        self.cuisineart.reset()
        self.cuisineart.addData(self.toPlainText())
        IMC.metaHash = IMC.documentHash = bytes(self.cuisineart.result()).__repr__()
        if metaStream is None:
            # load goodwords, badwords, and take census
            if goodStream is not None:
                IMC.goodWordList.load(goodStream)
            if badStream is not None:
                IMC.badWordList.load(badStream)
            self.rebuildMetadata(page=True) # build page table & vocab from scratch
        else:
            self.loadMetadata(metaStream)
        # If the metaData and document hashes now disagree, it is because the metadata
        # had a DOCHASH value for a different book or version. Warn the user.
        if IMC.metaHash != IMC.documentHash :
            pqMsgs.warningMsg(u"The document file and metadata file do not match!",
                              u"Bookmarks, page breaks and other metadata will be wrong! Strongly recommend you not edit or save this book.")
        # restore hiliting if the user wanted it. Note this can cause a
        # serious delay if the new book is large. However the alternative is
        # to not set it on and then we are out of step with the View menu
        # toggles, so the user has to set it off before loading, or suffer.
        self.setHighlight(IMC.scannoHiliteSwitch or IMC.spellingHiliteSwitch)
        # set a different main dict if there was one in the metadata
        if IMC.bookMainDict is not None:
            IMC.spellCheck.setMainDict(IMC.bookMainDict)

    # load page table & vocab from the .meta file as a stream.
    # n.b. QString has a split method we could use but instead
    # we take the input line to a Python u-string and split it. For
    # the word/char census we have to take the key back to a QString.
    def loadMetadata(self,metaStream):
        sectionRE = QRegExp( u"\{\{(" + '|'.join (
            ['PAGETABLE','CHARCENSUS','WORDCENSUS','BOOKMARKS',
             'NOTES','GOODWORDS','BADWORDS','CURSOR','VERSION',
             'STALECENSUS','NEEDSPELLCHECK','ENCODING', 'DOCHASH', 'MAINDICT'] ) \
                             + u")(.*)\}\}",
            Qt.CaseSensitive)
        metaVersion = 0 # base version
        while not metaStream.atEnd() :
            qline = metaStream.readLine().trimmed()
            if qline.isEmpty() : continue # allow blank lines between sections
            if sectionRE.exactMatch(qline) : # section start
                section = sectionRE.cap(1)
                argument = unicode(sectionRE.cap(2).trimmed())
                endsec = QString(u"{{/" + section + u"}}")
                if section == u"VERSION":
                    if len(argument) != 0 :
                        metaVersion = int(argument)
                    continue # no more data after {{VERSION x }}
                elif section == u"STALECENSUS" :
                    if argument == u"TRUE" :
                        IMC.staleCensus = IMC.staleCensusLoaded
                    continue # no more data after {{STALECENSUS x}}
                elif section == u"NEEDSPELLCHECK" :
                    if argument == u"TRUE" :
                        IMC.needSpellCheck = True
                    continue # no more data after {{NEEDSPELLCHECK x}}
                elif section == u"ENCODING" :
                    IMC.bookSaveEncoding = QString(argument)
                    continue
                elif section == u"MAINDICT" :
                    IMC.bookMainDict = QString(argument)
                    continue
                elif section == u"DOCHASH" :
                    IMC.metaHash = argument
                    continue
                elif section == u"PAGETABLE":
                    qline = metaStream.readLine()
                    while (not qline.startsWith(endsec)) and (not qline.isEmpty()):
                        IMC.pageTable.metaStringIn(qline)
                        qline = metaStream.readLine()
                    continue
                elif section == u"CHARCENSUS":
                    qline = metaStream.readLine()
                    while (not qline.startsWith(endsec)) and (not qline.isEmpty()):
                        # can't just .split the char census, the first
                        # char is the char being counted and it can be a space.
                        str = unicode(qline)
                        parts = str[2:].split(' ')
                        IMC.charCensus.append(QString(str[0]),int(parts[0]),int(parts[1]))
                        qline = metaStream.readLine()
                    continue
                elif section == u"WORDCENSUS":
                    qline = metaStream.readLine()
                    while (not qline.startsWith(endsec)) and (not qline.isEmpty()):
                        parts = unicode(qline).split(' ')
                        IMC.wordCensus.append(QString(parts[0]),int(parts[1]),int(parts[2]))
                        qline = metaStream.readLine()
                    continue
                elif section == u"BOOKMARKS":
                    qline = metaStream.readLine()
                    while (not qline.startsWith(endsec)) and (not qline.isEmpty()):
                        parts = unicode(qline).split(' ')
                        tc = QTextCursor(self.document() )
                        tc.setPosition(int(parts[1]))
                        if len(parts) == 3 : # early versions didn't save anchor
                            tc.movePosition(int(parts[2]),QTextCursor.KeepAnchor)
                        self.bookMarkList[int(parts[0])] = tc
                        qline = metaStream.readLine()
                    continue
                elif section == u"NOTES":
                    e = IMC.notesEditor
                    e.setUndoRedoEnabled(False)
                    qline = metaStream.readLine()
                    while (not qline.startsWith(endsec)) and not metaStream.atEnd():
                        if qline.startsWith(u"\xfffd"): # escaped {{
                            qline.remove(0,1)
                        e.appendPlainText(qline)
                        qline = metaStream.readLine()
                    e.setUndoRedoEnabled(True)
                    continue
                elif section == u"GOODWORDS" :
                    # not going to bother checking for endsec return,
                    # if it isn't that then we will shortly fail anyway
                    w = IMC.goodWordList.load(metaStream,endsec)
                    continue
                elif section == u"BADWORDS" :
                    w = IMC.badWordList.load(metaStream,endsec)
                    continue
                elif section == u"CURSOR" : # restore selection as of save
                    p1p2 = argument.split(' ')
                    tc = QTextCursor(self.document())
                    tc.setPosition(int(p1p2[0]),QTextCursor.MoveAnchor)
                    tc.setPosition(int(p1p2[1]),QTextCursor.KeepAnchor)
                    self.setTextCursor(tc)
                else:
                    # this can't happen; section is text captured by the RE
                    # and we have accounted for all possibilities
                    raise AssertionError, "impossible metadata"
            else: # Non-blank line that doesn't match sectionRE?
                pqMsgs.infoMsg(
                    "Unexpected line in metadata: {0}".format(pqMsgs.trunc(qline,20)),
                        "Metadata may be incomplete, suggest quit")
                break

    # Rebuild as much of the char/word census and spellcheck as we need to.
    # This is called from load, above, and from the Char and Word panels
    # Refresh buttons. If page=True we are loading a doc for which there is
    # no metadata file, so cache page definitions; otherwise just skip the
    # page definitions (see doCensus). If the doc has changed we need to
    # rerun the full char/word census. But if not, we might still need a
    # spellcheck, if the dictionary has changed.
    def rebuildMetadata(self,page=False):
        if page or (0 != IMC.staleCensus) :
            self.doCensus(page)
        if IMC.needSpellCheck :
            self.doSpellcheck()

    # Go through vocabulary census and check the spelling (it would be a big
    # waste of time to check every word as it was read). If the spellcheck
    # is not up (i.e. it couldn't find a dictionary) we only mark as bad the
    # words in the badwords list.
    def doSpellcheck(self):
        canspell = IMC.spellCheck.isUp()
        nwords = IMC.wordCensus.size()
        if 0 >= nwords : # could be zero in a null document
            return
        pqMsgs.startBar(nwords,"Checking spelling...")
        for i in range(IMC.wordCensus.size()):
            (qword, cnt, wflags) = IMC.wordCensus.get(i)
            wflags = wflags & (0xff - IMC.WordMisspelt) # turn off flag if on
            # some words have /dict-tag, split that out as string or ""
            (w,x,d) = unicode(qword).partition("/")
            if IMC.goodWordList.check(w):
                pass
            elif IMC.badWordList.check(w) :
                wflags |= IMC.WordMisspelt
            elif canspell : # check word in its optional dictionary
                if not ( IMC.spellCheck.check(w,d) ) :
                    wflags |= IMC.WordMisspelt
            IMC.wordCensus.setflags(i,wflags)
            if 0 == i & 0x1f :
                pqMsgs.rollBar(i)
        pqMsgs.endBar()
        IMC.needMetadataSave |= IMC.wordlistsChanged
        IMC.needSpellCheck = False
        if IMC.spellingHiliteSwitch :
            self.setHighlight(True) # force refresh of spell underlines

    # Scan the successive lines of the document and build the census of chars,
    # words, and (first time only) the table of page separators.
    #
    # If this is an HTML file (from IMC.bookType), and if its first line is
    # <!DOCTYPE..., we skip until we see <body>. This avoids polluting our
    # char and word censii with CSS comments and etc. Regular HTML tags
    # like <table> and <b> are skipped over automatically during parsing.
    #
    # Qt obligingly supplies each line as a QTextBlock. We examine the line
    # to see if it is a page separator. If we are opening a file having no
    # metadata, the Page argument is True and we build a page table entry.
    # Other times (e.g. from the Refresh button of the Word or Char panel),
    # we skip over page separator lines.

    # Each non-separator line is first scanned by characters and then for words.
    # The character scan counts characters for the Chars panel. We do NOT parse
    # the text for PGDP productions [oe] and [OE] nor other markups for accented
    # characters such as [=o] for o-with-macron or [^a] for a-with-circumflex.
    # These are just counted as [, o, e, ]. Reasons: (1) the alternative, to parse
    # them into their proper unicode values and count those, entails a whole lotta
    # code that would slow this census badly; (2) having the unicode chars in
    # the Chars panel would be confusing when they are not actually in the text;
    # (3) there is some value in having the counts of [ and ]. For similar reasons
    # we count all the chars in HTML e.g. "<i>" is three characters even though it
    # is effectively unprinted metadata.

    # In scanning words, we collect numbers as words. We collect internal hyphens
    # as letters ("mother-in-law") but not at end of word ("help----" or emdash).
    # We collect internal apostrophes ("it's", "hadn't") but not apostrophes at ends,
    # "'Twas" is counted as "Twas", "students' work" as "students work". This is because
    # there seems to be no way to distinguish the contractive prefix ('Twas)
    # and the final possessive (students') from normal single-quote marks!
    # And we collect leading and internal, but not trailing, square brackets as
    # letters. Thus [OE]dipus and ma[~n]ana are words (but will fail spellcheck)
    # while Einstein[A] (a footnote key) is not.

    # We also collect HTML productions ("</i>" and "<table>") as words. They do not
    # go in the census but we check them for lang= attributes and set the alternate
    # spellcheck dictionary from them.

    def doCensus(self, page=False) :
        global reLineSep, reTokens, reLang, qcLess
        # Clear the current census values
        IMC.wordCensus.clear()
        IMC.charCensus.clear()
        # Count chars locally for speed
        local_char_census = defaultdict(int)
        # Name of current alternate dictionary
        alt_dict = QString() # isEmpty when none
        # Tag from which we set an alternate dict
        alt_dict_tag = QString()
        # Start the progress bar based on the number of lines in the document
        pqMsgs.startBar(self.document().blockCount(),"Counting words and chars...")
        # Find the first text block of interest, skipping an HTML header file
        qtb = self.document().begin() # first text block
        if IMC.bookType.startsWith(QString(u"htm")) \
        and qtb.text().startsWith(QString(u"<!DOCTYPE")) :
            while (qtb != self.document().end()) \
            and (not qtb.text().startsWith(QString(u"<body"))) :
                qtb = qtb.next()
        # Scan all lines of the document to the end.
        while qtb != self.document().end() :
            qsLine = qtb.text() # text of line as qstring
            dbg = qsLine.size()
            dbg2 = qtb.length()
            if reLineSep.exactMatch(qsLine): # this is a page separator line
                if page :
                    # We are doing page seps, it's for Open with no .meta seen,
                    # the page table has been cleared. Store the page sep
                    # data in the page table, with a textCursor to its start.
                    qsfilenum = reLineSep.cap(1) # xxx from "File: xxx.png"
                    qsproofers = reLineSep.cap(2) # \who\x\blah\etc
                    # proofer names can contain spaces, replace with en-space char
                    qsproofers.replace(QChar(" "),QChar(0x2002))
                    # create a new TextCursor instance
                    tcursor = QTextCursor(self.document())
                    # point it to this text block
                    tcursor.setPosition(qtb.position())
                    # dump all that in the page table
                    IMC.pageTable.loadPsep(tcursor, qsfilenum, qsproofers)
                # else not doing pages, just ignore this psep line
            else: # not psep, ordinary text line, count chars and words
                pyLine = unicode(qsLine) # move into Python space to count
                for c in pyLine :
                    local_char_census[c] += 1
                j = 0
                while True:
                    j = reTokens.indexIn(qsLine,j)
                    if j < 0 : # no more word-like units
                        break
                    qsWord = reTokens.cap(0)
                    j += qsWord.size()
                    if qsWord.startsWith(qcLess) :
                        # Examine a captured HTML production.
                        if not reTokens.cap(2).isEmpty() :
                            # HTML open tag, look for lang='dict'
                            if 0 <= reLang.indexIn(reTokens.cap(3)) :
                                # found it: save tag and dict name
                                alt_dict_tag = QString(reTokens.cap(2))
                                alt_dict = QString(reLang.cap(1))
                                alt_dict.prepend(u'/') # make "/en_GB"
                            # else no lang= attribute
                        else:
                            # HTML close tag, see if it closes alt dict use
                            if reTokens.cap(5) == alt_dict_tag :
                                # yes, matches open-tag for dict, clear it
                                alt_dict_tag = QString()
                                alt_dict = QString()
                            # else no alt dict in use, or didn't match
                    else : # did not start with "<", process as a word
                        # Set the property flags, which is harder now we don't
                        # look at every character. Use the QString facilities
                        # rather than python because python .isalnum fails
                        # for a hyphenated number "1850-1910".
                        flag = 0
                        if 0 != qsWord.compare(qsWord.toLower()) :
                            flag |= IMC.WordHasUpper
                        if 0 != qsWord.compare(qsWord.toUpper()) :
                            flag |= IMC.WordHasLower
                        if qsWord.contains(qcHyphen) :
                            flag |= IMC.WordHasHyphen
                        if qsWord.contains(qcApostrophe) or qsWord.contains(qcCurlyApostrophe) :
                            flag |= IMC.WordHasApostrophe
                        if qsWord.contains(reDigit) :
                            flag |= IMC.WordHasDigit
                        IMC.wordCensus.count(qsWord.append(alt_dict),flag)
                # end "while any more words in this line"
            # end of not-a-psep-line processing
            qtb = qtb.next() # move on to next block
            if (0 == (qtb.blockNumber() & 255)) : #every 256th block
                pqMsgs.rollBar(qtb.blockNumber()) # roll the bar
                QApplication.processEvents()
        # end of scanning all text blocks in the doc
        pqMsgs.endBar()
        # we accumulated the char counts in localCharCensus. Now read it out
        # in sorted order and stick it in the IMC.charCensus list.
        for one_char in sorted(local_char_census.keys()):
            qc = QChar(ord(one_char)) # get to QChar for category() method
            IMC.charCensus.append(QString(qc),local_char_census[one_char],qc.category())
        IMC.needSpellCheck = True # after a census this is true
        IMC.staleCensus = 0 # but this is no longer true
        IMC.needMetadataSave |= IMC.wordlistsChanged

Example #4

Show file

File: pqEdit.py Project: jlg234bob/PPQT

class PPTextEditor(QPlainTextEdit):
    # Initialize the editor on creation.
    def __init__(self, parent=None, fontsize=12):
        super(PPTextEditor, self).__init__(parent)
        # Do not allow line-wrap; horizontal scrollbar appears when required.
        self.setLineWrapMode(QPlainTextEdit.NoWrap)
        # make sure when we jump to a line, it goes to the window center
        self.setCenterOnScroll(True)
        # Get a monospaced font as selected by the user with View>Font
        self.setFont(pqMsgs.getMonoFont(fontsize, True))
        # instantiate our "syntax" highlighter object, but link it to an empty
        # QTextDocument. We will redirect it to our actual document only after
        # loading a document, as it relies on metadata, and then only when/if
        # the IMC.*HiliteSwitch es are on.
        self.nulDoc = QTextDocument()  # make a null document
        self.hiliter = wordHighLighter(self.nulDoc)
        # all the metadata lists will be initialized when self.clear() is
        # called from pqMain, shortly.
        # save a regex for quickly finding if a selection is a single word
        self.oneWordRE = QRegExp(u'^\W*(\w{2,})\W*$')
        self.menuWord = QString()
        # Create and initialize an SHA-1 hash machine
        self.cuisineart = QCryptographicHash(QCryptographicHash.Sha1)

    # switch on or off our text-highlighting. By switching the highlighter
    # to a null document we remove highlighting; by switching it back to
    # the real document, we cause re-highlighting of everything. This makes
    # significant delay for a large document, so put up a status message
    # during it by starting and ending a progress bar.
    def setHighlight(self, onoff):
        self.hiliter.setDocument(self.nulDoc)  # turn off hiliting always
        if onoff:
            pqMsgs.showStatusMsg("Setting Scanno/Spelling Highlights...")
            self.hiliter.setDocument(self.document())
            pqMsgs.clearStatusMsg()

    # Implement clear/new. Just toss everything we keep.
    def clear(self):
        self.setHighlight(False)
        self.document().clear()
        self.document().setModified(False)
        self.bookMarkList = \
            [None, None, None, None, None, None, None, None, None]
        IMC.pageTable.clear()
        IMC.goodWordList.clear()
        IMC.badWordList.clear()
        IMC.wordCensus.clear()
        IMC.charCensus.clear()
        IMC.notesEditor.clear()
        IMC.pngPanel.clear()
        IMC.needSpellCheck = False
        IMC.needMetadataSave = 0x00
        IMC.staleCensus = 0x00
        IMC.bookSaveEncoding = QString(u'UTF-8')
        IMC.bookMainDict = IMC.spellCheck.mainTag
        # force a cursor "move" in order to create a cursorMoved signal that will
        # clear the status line - then undo it so the document isn't modified.
        self.textCursor().insertText(QString(' '))
        self.document().undo()

    # Implement the Edit menu items:
    # Edit > ToUpper,  Edit > ToTitle,  Edit > ToLower
    # Note that in full Unicode, changing letter case is not so simple as it
    # was in Latin-1! We use the QChar and QString facilities to do it, and
    # a regex in a loop to pick off words. Restore the current selection after
    # so another operation can be done on it.
    # N.B. it is not possible to do self.textCursor().setPosition(), it seems
    # that self.textCursor() is "const". One has to create a new cursor,
    # position it, and install it on the document with self.setTextCursor().
    def toUpperCase(self):
        global reWord
        tc = QTextCursor(self.textCursor())
        if not tc.hasSelection():
            return  # no selection, nothing to do
        startpos = tc.selectionStart()
        endpos = tc.selectionEnd()
        qs = QString(tc.selectedText())  # copy of selected text
        i = reWord.indexIn(qs, 0)  # index of first word if any
        if i < 0: return  # no words in selection, exit
        while i >= 0:
            w = reWord.cap(0)  # found word as QString
            n = w.size()  # its length
            qs.replace(i, n, w.toUpper())  # replace it with UC version
            i = reWord.indexIn(qs, i + n)  # find next word if any
        # we have changed at least one word, replace selection with altered text
        tc.insertText(qs)
        # that wiped the selection, so restore it by "dragging" left to right
        tc.setPosition(startpos, QTextCursor.MoveAnchor)  # click
        tc.setPosition(endpos, QTextCursor.KeepAnchor)  # drag
        self.setTextCursor(tc)

    # to-lower is identical except for the method call.
    def toLowerCase(self):
        global reWord  # the regex \b\w+\b
        tc = QTextCursor(self.textCursor())
        if not tc.hasSelection():
            return  # no selection, nothing to do
        startpos = tc.selectionStart()
        endpos = tc.selectionEnd()
        qs = QString(tc.selectedText())  # copy of selected text
        i = reWord.indexIn(qs, 0)  # index of first word if any
        if i < 0: return  # no words in selection, exit
        while i >= 0:
            w = reWord.cap(0)  # found word as QString
            n = w.size()  # its length
            qs.replace(i, n, w.toLower())  # replace it with UC version
            i = reWord.indexIn(qs, i + n)  # find next word if any
        # we have changed at least one word, replace selection with altered text
        tc.insertText(qs)
        # that wiped the selection, so restore it by "dragging" left to right
        tc.setPosition(startpos, QTextCursor.MoveAnchor)  # click
        tc.setPosition(endpos, QTextCursor.KeepAnchor)  # drag
        self.setTextCursor(tc)

    # toTitle is similar but we have to change the word to lowercase (in case
    # it is uppercase now) and then change the initial character to upper.
    # Note it would be possible to write a smarter version that looked up the
    # word in a list of common adjectives, connectives, and adverbs and avoided
    # capitalizing a, and, of, by and so forth. Not gonna happen.
    def toTitleCase(self):
        global reWord  # the regex \b\w+\b
        self.toLowerCase()
        tc = QTextCursor(self.textCursor())
        if not tc.hasSelection():
            return  # no selection, nothing to do
        startpos = tc.selectionStart()
        endpos = tc.selectionEnd()
        qs = QString(tc.selectedText())  # copy of selected text
        i = reWord.indexIn(qs, 0)  # index of first word if any
        if i < 0: return  # no words in selection, exit
        while i >= 0:
            w = reWord.cap(0)  # found word as QString
            n = w.size()
            qs.replace(i, 1, qs.at(i).toUpper())  # replace initial with UC
            i = reWord.indexIn(qs, i + n)  # find next word if any
        # we have changed at least one word, replace selection with altered text
        tc.insertText(qs)
        # that wiped the selection, so restore it by "dragging" left to right
        tc.setPosition(startpos, QTextCursor.MoveAnchor)  # click
        tc.setPosition(endpos, QTextCursor.KeepAnchor)  # drag
        self.setTextCursor(tc)

    # Re-implement the parent's keyPressEvent in order to provide some
    # special controls. (Note on Mac, "ctrl-" is "cmd-" and "alt-" is "opt-")
    # ctrl-plus increases the edit font size 1 pt
    # (n.b. ctrl-plus probably only comes from a keypad, we usually just get
    #  ctrl-shift-equals instead of plus)
    # ctrl-minus decreases the edit font size 1 pt
    # ctrl-<n> for n in 1..9 jumps the insertion point to bookmark <n>
    # ctrl-shift-<n> extends the selection to bookmark <n>
    # ctrl-alt-<n> sets bookmark n at the current position
    def keyPressEvent(self, event):
        #pqMsgs.printKeyEvent(event)
        kkey = int(int(event.modifiers()) & IMC.keypadDeModifier) | int(
            event.key())
        # add as little overhead as possible: if it isn't ours, pass it on.
        if kkey in IMC.keysOfInterest:  # we trust python to do this quickly
            event.accept()  # we handle this one
            if kkey in IMC.findKeys:
                # ^f, ^g, etc. -- just pass them straight to the Find panel
                self.emit(SIGNAL("editKeyPress"), kkey)
            elif kkey in IMC.zoomKeys:
                # n.b. the self.font and setFont methods inherit from QWidget
                # Point increment by which to change.
                n = (-1) if (kkey == IMC.ctl_minus) else 1
                # Actual point size currently in use, plus increment
                p = self.fontInfo().pointSize() + n
                if (p > 3) and (p < 65):  # don't let's get ridiculous, hmm?
                    # Simply calling self.font().setPointSize() had no effect,
                    # we have to actually call setFont() to make change happen.
                    f = self.font()  # so get our font,
                    f.setPointSize(p)  # change its point size +/-
                    self.setFont(f)  # and put the font back
                    IMC.fontSize = p  # and remember the size for shutdown time
            elif kkey in IMC.markKeys:  # ^1-9, jump to bookmark
                bkn = kkey - IMC.ctl_1  # make it 0-8
                if self.bookMarkList[
                        bkn] is not None:  # if that bookmark is set,
                    self.setTextCursor(self.bookMarkList[bkn])  # jump to it
            elif kkey in IMC.markShiftKeys:  # shift-ctl-1/9, select to mark
                # Make our document cursor's selection go from our current ANCHOR
                # to the POSITION from the bookmark cursor.
                mark_tc = self.bookMarkList[kkey - IMC.ctl_shft_1]
                if mark_tc is not None:
                    tc = QTextCursor(self.textCursor())
                    tc.setPosition(mark_tc.position(), QTextCursor.KeepAnchor)
                    self.setTextCursor(tc)
            elif kkey in IMC.markSetKeys:  # ctl-alt-1-9, set a bookmark
                bkn = kkey - IMC.ctl_alt_1  # make it 0-8
                self.bookMarkList[bkn] = QTextCursor(self.textCursor())
                IMC.needMetadataSave |= IMC.bookmarksChanged
        else:  # not in keysOfInterest, so pass it up to parent
            event.ignore()
            super(PPTextEditor, self).keyPressEvent(event)

    # Called from pqFind after doing a successful search, this method centers the
    # current selection (which is the result of the find) in the window. If the selection
    # is large, put the top of the selection higher than center but on no account
    # above the top of the viewport. Two problems arise: One, the rectangles returned
    # by .cursorRect() and by .viewport().geometry() are in pixel units, while the
    # vertical scrollbar is sized in logical text lines. So we work out the adjustment
    # as a fraction of the viewport, times the scrollbar's pageStep value to get lines.
    # Two, cursorRect gives only the height of the actual cursor, not of the selected
    # text. To find out the height of the full selection we have to get a cursorRect
    # for the start of the selection, and another for the end of it.
    def centerCursor(self):
        tc = QTextCursor(
            self.textCursor())  # copy the working cursor with its selection
        top_point = tc.position()  # one end of selection, in character units
        bot_point = tc.anchor()  # ..and the other end
        if top_point > bot_point:  # often the position is > the anchor
            (top_point, bot_point) = (bot_point, top_point)
        tc.setPosition(top_point)  # cursor for the top of the selection
        selection_top = self.cursorRect(tc).top()  # ..get its top pixel
        line_height = self.cursorRect(
            tc).height()  # and save height of one line
        tc.setPosition(bot_point)  # cursor for the end of the selection
        selection_bot = self.cursorRect(
            tc).bottom()  # ..selection's bottom pixel
        selection_height = selection_bot - selection_top + 1  # selection height in pixels
        view_height = self.viewport().geometry().height(
        )  # scrolled area's height in px
        view_half = view_height >> 1  # int(view_height/2)
        pixel_adjustment = 0
        if selection_height < view_half:
            # selected text is less than half the window height: center the top of the
            # selection, i.e., make the cursor_top equal to view_half.
            pixel_adjustment = selection_top - view_half  # may be negative
        else:
            # selected text is taller than half the window, can we show it all?
            if selection_height < (view_height - line_height):
                # all selected text fits in the viewport (with a little free): center it.
                pixel_adjustment = (selection_top +
                                    (selection_height / 2)) - view_half
            else:
                # not all selected text fits the window, put text top near window top
                pixel_adjustment = selection_top - line_height
        # OK, convert the pixel adjustment to a line-adjustment based on the assumption
        # that a scrollbar pageStep is the height of the viewport in lines.
        adjust_fraction = pixel_adjustment / view_height
        vscroller = self.verticalScrollBar()
        page_step = vscroller.pageStep(
        )  # lines in a viewport page, actually less 1
        adjust_lines = int(page_step * adjust_fraction)
        target = vscroller.value() + adjust_lines
        if (target >= 0) and (target <= vscroller.maximum()):
            vscroller.setValue(target)

    # Catch the contextMenu event and extend the standard context menu with
    # a separator and the option to add a word to good-words, but only when
    # there is a selection and it encompasses just one word.
    def contextMenuEvent(self, event):
        ctx_menu = self.createStandardContextMenu()
        if self.textCursor().hasSelection:
            qs = self.textCursor().selectedText()
            if 0 == self.oneWordRE.indexIn(
                    qs):  # it matches at 0 or not at all
                self.menuWord = self.oneWordRE.cap(1)  # save the word
                ctx_menu.addSeparator()
                gw_name = QString(self.menuWord)  # make a copy
                gw_action = ctx_menu.addAction(
                    gw_name.append(QString(u' -> Goodwords')))
                self.connect(gw_action, SIGNAL("triggered()"), self.addToGW)
        ctx_menu.exec_(event.globalPos())

    # This slot receives the "someword -> good_words" context menu action
    def addToGW(self):
        IMC.goodWordList.insert(self.menuWord)
        IMC.needMetadataSave |= IMC.goodwordsChanged
        IMC.needSpellCheck = True
        IMC.mainWindow.setWinModStatus()

    # Implement save: the main window opens the files for output using
    # QIODevice::WriteOnly, which wipes the contents (contrary to the doc)
    # so we need to write the document and metadata regardless of whether
    # they've been modified. However we avoid rebuilding metadata if we can.
    def save(self, dataStream, metaStream):
        # Get the contents of the document as a QString
        doc_text = self.toPlainText()
        # Calculate the SHA-1 hash over the document and save it in both hash
        # fields of the IMC.
        self.cuisineart.reset()
        self.cuisineart.addData(doc_text)
        IMC.metaHash = IMC.documentHash = bytes(
            self.cuisineart.result()).__repr__()
        # write the document, which is pretty simple in the QStream world
        dataStream << doc_text
        dataStream.flush()
        #self.rebuildMetadata() # update any census that needs it
        self.writeMetadata(metaStream)
        metaStream.flush()
        IMC.needMetadataSave = 0x00
        self.document().setModified(
            False)  # this triggers main.setWinModStatus()

    def writeMetadata(self, metaStream):
        # Writing the metadata takes a bit more work.
        # pageTable goes out between {{PAGETABLE}}..{{/PAGETABLE}}
        metaStream << u"{{VERSION 0}}\n"  # meaningless at the moment
        metaStream << u"{{ENCODING "
        metaStream << unicode(IMC.bookSaveEncoding)
        metaStream << u"}}\n"
        metaStream << u"{{STALECENSUS "
        if 0 == IMC.staleCensus:
            metaStream << u"FALSE"
        else:
            metaStream << u"TRUE"
        metaStream << u"}}\n"
        metaStream << u"{{NEEDSPELLCHECK "
        if 0 == IMC.needSpellCheck:
            metaStream << u"FALSE"
        else:
            metaStream << u"TRUE"
        metaStream << u"}}\n"
        metaStream << u"{{MAINDICT "
        metaStream << unicode(IMC.bookMainDict)
        metaStream << u"}}\n"
        # The hash could contain any character. Using __repr__ ensured
        # it is enclosed in balanced single or double quotes but to be
        # double sure we will fence it in characters we can spot with a regex.
        metaStream << u"{{DOCHASH " + IMC.documentHash + u" }}\n"
        if IMC.pageTable.size():
            metaStream << u"{{PAGETABLE}}\n"
            for i in range(IMC.pageTable.size()):
                metaStream << IMC.pageTable.metaStringOut(i)
            metaStream << u"{{/PAGETABLE}}\n"
        if IMC.charCensus.size():
            metaStream << u"{{CHARCENSUS}}\n"
            for i in range(IMC.charCensus.size()):
                (w, n, f) = IMC.charCensus.get(i)
                metaStream << "{0} {1} {2}\n".format(unicode(w), n, f)
            metaStream << u"{{/CHARCENSUS}}\n"
        if IMC.wordCensus.size():
            metaStream << u"{{WORDCENSUS}}\n"
            for i in range(IMC.wordCensus.size()):
                (w, n, f) = IMC.wordCensus.get(i)
                metaStream << "{0} {1} {2}\n".format(unicode(w), n, f)
            metaStream << u"{{/WORDCENSUS}}\n"
        metaStream << u"{{BOOKMARKS}}\n"
        for i in range(9):  # 0..8
            if self.bookMarkList[i] is not None:
                metaStream << "{0} {1} {2}\n".format(
                    i, self.bookMarkList[i].position(),
                    self.bookMarkList[i].anchor())
        metaStream << u"{{/BOOKMARKS}}\n"
        metaStream << u"{{NOTES}}\n"
        d = IMC.notesEditor.document()
        if not d.isEmpty():
            for i in range(d.blockCount()):
                t = d.findBlockByNumber(i).text()
                if t.startsWith("{{"):
                    t.prepend(u"\xfffd")  # Unicode Replacement char
                metaStream << t + "\n"
            IMC.notesEditor.document().setModified(False)
        metaStream << u"{{/NOTES}}\n"
        if IMC.goodWordList.active():  # have some good words
            metaStream << u"{{GOODWORDS}}\n"
            IMC.goodWordList.save(metaStream)
            metaStream << u"{{/GOODWORDS}}\n"
        if IMC.badWordList.active():  # have some bad words
            metaStream << u"{{BADWORDS}}\n"
            IMC.badWordList.save(metaStream)
            metaStream << u"{{/BADWORDS}}\n"
        p1 = self.textCursor().selectionStart()
        p2 = self.textCursor().selectionEnd()
        metaStream << u"{{CURSOR " + unicode(p1) + u' ' + unicode(p2) + u"}}\n"
        metaStream.flush()

    # Implement load: the main window has the job of finding and opening files
    # then passes QTextStreams ready to read here. If metaStream is None,
    # no metadata file was found and we construct the metadata.
    # n.b. before main calls here, it calls our .clear, hence lists are
    # empty, hiliting is off, etc.

    def load(self, dataStream, metaStream, goodStream, badStream):
        # Load the document file into the editor
        self.setPlainText(dataStream.readAll())
        # Initialize the hash value for the document, which will be equal unless
        # we read something different from the metadata file.
        self.cuisineart.reset()
        self.cuisineart.addData(self.toPlainText())
        IMC.metaHash = IMC.documentHash = bytes(
            self.cuisineart.result()).__repr__()
        if metaStream is None:
            # load goodwords, badwords, and take census
            if goodStream is not None:
                IMC.goodWordList.load(goodStream)
            if badStream is not None:
                IMC.badWordList.load(badStream)
            self.rebuildMetadata(
                page=True)  # build page table & vocab from scratch
        else:
            self.loadMetadata(metaStream)
        # If the metaData and document hashes now disagree, it is because the metadata
        # had a DOCHASH value for a different book or version. Warn the user.
        if IMC.metaHash != IMC.documentHash:
            pqMsgs.warningMsg(
                u"The document file and metadata file do not match!",
                u"Bookmarks, page breaks and other metadata will be wrong! Strongly recommend you not edit or save this book."
            )
        # restore hiliting if the user wanted it. Note this can cause a
        # serious delay if the new book is large. However the alternative is
        # to not set it on and then we are out of step with the View menu
        # toggles, so the user has to set it off before loading, or suffer.
        self.setHighlight(IMC.scannoHiliteSwitch or IMC.spellingHiliteSwitch)
        # set a different main dict if there was one in the metadata
        if IMC.bookMainDict is not None:
            IMC.spellCheck.setMainDict(IMC.bookMainDict)

    # load page table & vocab from the .meta file as a stream.
    # n.b. QString has a split method we could use but instead
    # we take the input line to a Python u-string and split it. For
    # the word/char census we have to take the key back to a QString.
    def loadMetadata(self, metaStream):
        sectionRE = QRegExp( u"\{\{(" + '|'.join (
            ['PAGETABLE','CHARCENSUS','WORDCENSUS','BOOKMARKS',
             'NOTES','GOODWORDS','BADWORDS','CURSOR','VERSION',
             'STALECENSUS','NEEDSPELLCHECK','ENCODING', 'DOCHASH', 'MAINDICT'] ) \
                             + u")(.*)\}\}",
            Qt.CaseSensitive)
        metaVersion = 0  # base version
        while not metaStream.atEnd():
            qline = metaStream.readLine().trimmed()
            if qline.isEmpty(): continue  # allow blank lines between sections
            if sectionRE.exactMatch(qline):  # section start
                section = sectionRE.cap(1)
                argument = unicode(sectionRE.cap(2).trimmed())
                endsec = QString(u"{{/" + section + u"}}")
                if section == u"VERSION":
                    if len(argument) != 0:
                        metaVersion = int(argument)
                    continue  # no more data after {{VERSION x }}
                elif section == u"STALECENSUS":
                    if argument == u"TRUE":
                        IMC.staleCensus = IMC.staleCensusLoaded
                    continue  # no more data after {{STALECENSUS x}}
                elif section == u"NEEDSPELLCHECK":
                    if argument == u"TRUE":
                        IMC.needSpellCheck = True
                    continue  # no more data after {{NEEDSPELLCHECK x}}
                elif section == u"ENCODING":
                    IMC.bookSaveEncoding = QString(argument)
                    continue
                elif section == u"MAINDICT":
                    IMC.bookMainDict = QString(argument)
                    continue
                elif section == u"DOCHASH":
                    IMC.metaHash = argument
                    continue
                elif section == u"PAGETABLE":
                    qline = metaStream.readLine()
                    while (not qline.startsWith(endsec)) and (
                            not qline.isEmpty()):
                        IMC.pageTable.metaStringIn(qline)
                        qline = metaStream.readLine()
                    continue
                elif section == u"CHARCENSUS":
                    qline = metaStream.readLine()
                    while (not qline.startsWith(endsec)) and (
                            not qline.isEmpty()):
                        # can't just .split the char census, the first
                        # char is the char being counted and it can be a space.
                        str = unicode(qline)
                        parts = str[2:].split(' ')
                        IMC.charCensus.append(QString(str[0]), int(parts[0]),
                                              int(parts[1]))
                        qline = metaStream.readLine()
                    continue
                elif section == u"WORDCENSUS":
                    qline = metaStream.readLine()
                    while (not qline.startsWith(endsec)) and (
                            not qline.isEmpty()):
                        parts = unicode(qline).split(' ')
                        IMC.wordCensus.append(QString(parts[0]), int(parts[1]),
                                              int(parts[2]))
                        qline = metaStream.readLine()
                    continue
                elif section == u"BOOKMARKS":
                    qline = metaStream.readLine()
                    while (not qline.startsWith(endsec)) and (
                            not qline.isEmpty()):
                        parts = unicode(qline).split(' ')
                        tc = QTextCursor(self.document())
                        tc.setPosition(int(parts[1]))
                        if len(parts
                               ) == 3:  # early versions didn't save anchor
                            tc.movePosition(int(parts[2]),
                                            QTextCursor.KeepAnchor)
                        self.bookMarkList[int(parts[0])] = tc
                        qline = metaStream.readLine()
                    continue
                elif section == u"NOTES":
                    e = IMC.notesEditor
                    e.setUndoRedoEnabled(False)
                    qline = metaStream.readLine()
                    while (not qline.startsWith(endsec)
                           ) and not metaStream.atEnd():
                        if qline.startsWith(u"\xfffd"):  # escaped {{
                            qline.remove(0, 1)
                        e.appendPlainText(qline)
                        qline = metaStream.readLine()
                    e.setUndoRedoEnabled(True)
                    continue
                elif section == u"GOODWORDS":
                    # not going to bother checking for endsec return,
                    # if it isn't that then we will shortly fail anyway
                    w = IMC.goodWordList.load(metaStream, endsec)
                    continue
                elif section == u"BADWORDS":
                    w = IMC.badWordList.load(metaStream, endsec)
                    continue
                elif section == u"CURSOR":  # restore selection as of save
                    p1p2 = argument.split(' ')
                    tc = QTextCursor(self.document())
                    tc.setPosition(int(p1p2[0]), QTextCursor.MoveAnchor)
                    tc.setPosition(int(p1p2[1]), QTextCursor.KeepAnchor)
                    self.setTextCursor(tc)
                else:
                    # this can't happen; section is text captured by the RE
                    # and we have accounted for all possibilities
                    raise AssertionError, "impossible metadata"
            else:  # Non-blank line that doesn't match sectionRE?
                pqMsgs.infoMsg(
                    "Unexpected line in metadata: {0}".format(
                        pqMsgs.trunc(qline, 20)),
                    "Metadata may be incomplete, suggest quit")
                break

    # Rebuild as much of the char/word census and spellcheck as we need to.
    # This is called from load, above, and from the Char and Word panels
    # Refresh buttons. If page=True we are loading a doc for which there is
    # no metadata file, so cache page definitions; otherwise just skip the
    # page definitions (see doCensus). If the doc has changed we need to
    # rerun the full char/word census. But if not, we might still need a
    # spellcheck, if the dictionary has changed.
    def rebuildMetadata(self, page=False):
        if page or (0 != IMC.staleCensus):
            self.doCensus(page)
        if IMC.needSpellCheck:
            self.doSpellcheck()

    # Go through vocabulary census and check the spelling (it would be a big
    # waste of time to check every word as it was read). If the spellcheck
    # is not up (i.e. it couldn't find a dictionary) we only mark as bad the
    # words in the badwords list.
    def doSpellcheck(self):
        canspell = IMC.spellCheck.isUp()
        nwords = IMC.wordCensus.size()
        if 0 >= nwords:  # could be zero in a null document
            return
        pqMsgs.startBar(nwords, "Checking spelling...")
        for i in range(IMC.wordCensus.size()):
            (qword, cnt, wflags) = IMC.wordCensus.get(i)
            wflags = wflags & (0xff - IMC.WordMisspelt)  # turn off flag if on
            # some words have /dict-tag, split that out as string or ""
            (w, x, d) = unicode(qword).partition("/")
            if IMC.goodWordList.check(w):
                pass
            elif IMC.badWordList.check(w):
                wflags |= IMC.WordMisspelt
            elif canspell:  # check word in its optional dictionary
                if not (IMC.spellCheck.check(w, d)):
                    wflags |= IMC.WordMisspelt
            IMC.wordCensus.setflags(i, wflags)
            if 0 == i & 0x1f:
                pqMsgs.rollBar(i)
        pqMsgs.endBar()
        IMC.needMetadataSave |= IMC.wordlistsChanged
        IMC.needSpellCheck = False
        if IMC.spellingHiliteSwitch:
            self.setHighlight(True)  # force refresh of spell underlines

    # Scan the successive lines of the document and build the census of chars,
    # words, and (first time only) the table of page separators.
    #
    # If this is an HTML file (from IMC.bookType), and if its first line is
    # <!DOCTYPE..., we skip until we see <body>. This avoids polluting our
    # char and word censii with CSS comments and etc. Regular HTML tags
    # like <table> and <b> are skipped over automatically during parsing.
    #
    # Qt obligingly supplies each line as a QTextBlock. We examine the line
    # to see if it is a page separator. If we are opening a file having no
    # metadata, the Page argument is True and we build a page table entry.
    # Other times (e.g. from the Refresh button of the Word or Char panel),
    # we skip over page separator lines.

    # Each non-separator line is first scanned by characters and then for words.
    # The character scan counts characters for the Chars panel. We do NOT parse
    # the text for PGDP productions [oe] and [OE] nor other markups for accented
    # characters such as [=o] for o-with-macron or [^a] for a-with-circumflex.
    # These are just counted as [, o, e, ]. Reasons: (1) the alternative, to parse
    # them into their proper unicode values and count those, entails a whole lotta
    # code that would slow this census badly; (2) having the unicode chars in
    # the Chars panel would be confusing when they are not actually in the text;
    # (3) there is some value in having the counts of [ and ]. For similar reasons
    # we count all the chars in HTML e.g. "<i>" is three characters even though it
    # is effectively unprinted metadata.

    # In scanning words, we collect numbers as words. We collect internal hyphens
    # as letters ("mother-in-law") but not at end of word ("help----" or emdash).
    # We collect internal apostrophes ("it's", "hadn't") but not apostrophes at ends,
    # "'Twas" is counted as "Twas", "students' work" as "students work". This is because
    # there seems to be no way to distinguish the contractive prefix ('Twas)
    # and the final possessive (students') from normal single-quote marks!
    # And we collect leading and internal, but not trailing, square brackets as
    # letters. Thus [OE]dipus and ma[~n]ana are words (but will fail spellcheck)
    # while Einstein[A] (a footnote key) is not.

    # We also collect HTML productions ("</i>" and "<table>") as words. They do not
    # go in the census but we check them for lang= attributes and set the alternate
    # spellcheck dictionary from them.

    def doCensus(self, page=False):
        global reLineSep, reTokens, reLang, qcLess
        # Clear the current census values
        IMC.wordCensus.clear()
        IMC.charCensus.clear()
        # Count chars locally for speed
        local_char_census = defaultdict(int)
        # Name of current alternate dictionary
        alt_dict = QString()  # isEmpty when none
        # Tag from which we set an alternate dict
        alt_dict_tag = QString()
        # Start the progress bar based on the number of lines in the document
        pqMsgs.startBar(self.document().blockCount(),
                        "Counting words and chars...")
        # Find the first text block of interest, skipping an HTML header file
        qtb = self.document().begin()  # first text block
        if IMC.bookType.startsWith(QString(u"htm")) \
        and qtb.text().startsWith(QString(u"<!DOCTYPE")) :
            while (qtb != self.document().end()) \
            and (not qtb.text().startsWith(QString(u"<body"))) :
                qtb = qtb.next()
        # Scan all lines of the document to the end.
        while qtb != self.document().end():
            qsLine = qtb.text()  # text of line as qstring
            dbg = qsLine.size()
            dbg2 = qtb.length()
            if reLineSep.exactMatch(qsLine):  # this is a page separator line
                if page:
                    # We are doing page seps, it's for Open with no .meta seen,
                    # the page table has been cleared. Store the page sep
                    # data in the page table, with a textCursor to its start.
                    qsfilenum = reLineSep.cap(1)  # xxx from "File: xxx.png"
                    qsproofers = reLineSep.cap(2)  # \who\x\blah\etc
                    # proofer names can contain spaces, replace with en-space char
                    qsproofers.replace(QChar(" "), QChar(0x2002))
                    # create a new TextCursor instance
                    tcursor = QTextCursor(self.document())
                    # point it to this text block
                    tcursor.setPosition(qtb.position())
                    # dump all that in the page table
                    IMC.pageTable.loadPsep(tcursor, qsfilenum, qsproofers)
                # else not doing pages, just ignore this psep line
            else:  # not psep, ordinary text line, count chars and words
                pyLine = unicode(qsLine)  # move into Python space to count
                for c in pyLine:
                    local_char_census[c] += 1
                j = 0
                while True:
                    j = reTokens.indexIn(qsLine, j)
                    if j < 0:  # no more word-like units
                        break
                    qsWord = reTokens.cap(0)
                    j += qsWord.size()
                    if qsWord.startsWith(qcLess):
                        # Examine a captured HTML production.
                        if not reTokens.cap(2).isEmpty():
                            # HTML open tag, look for lang='dict'
                            if 0 <= reLang.indexIn(reTokens.cap(3)):
                                # found it: save tag and dict name
                                alt_dict_tag = QString(reTokens.cap(2))
                                alt_dict = QString(reLang.cap(1))
                                alt_dict.prepend(u'/')  # make "/en_GB"
                            # else no lang= attribute
                        else:
                            # HTML close tag, see if it closes alt dict use
                            if reTokens.cap(5) == alt_dict_tag:
                                # yes, matches open-tag for dict, clear it
                                alt_dict_tag = QString()
                                alt_dict = QString()
                            # else no alt dict in use, or didn't match
                    else:  # did not start with "<", process as a word
                        # Set the property flags, which is harder now we don't
                        # look at every character. Use the QString facilities
                        # rather than python because python .isalnum fails
                        # for a hyphenated number "1850-1910".
                        flag = 0
                        if 0 != qsWord.compare(qsWord.toLower()):
                            flag |= IMC.WordHasUpper
                        if 0 != qsWord.compare(qsWord.toUpper()):
                            flag |= IMC.WordHasLower
                        if qsWord.contains(qcHyphen):
                            flag |= IMC.WordHasHyphen
                        if qsWord.contains(qcApostrophe) or qsWord.contains(
                                qcCurlyApostrophe):
                            flag |= IMC.WordHasApostrophe
                        if qsWord.contains(reDigit):
                            flag |= IMC.WordHasDigit
                        IMC.wordCensus.count(qsWord.append(alt_dict), flag)
                # end "while any more words in this line"
            # end of not-a-psep-line processing
            qtb = qtb.next()  # move on to next block
            if (0 == (qtb.blockNumber() & 255)):  #every 256th block
                pqMsgs.rollBar(qtb.blockNumber())  # roll the bar
                QApplication.processEvents()
        # end of scanning all text blocks in the doc
        pqMsgs.endBar()
        # we accumulated the char counts in localCharCensus. Now read it out
        # in sorted order and stick it in the IMC.charCensus list.
        for one_char in sorted(local_char_census.keys()):
            qc = QChar(ord(one_char))  # get to QChar for category() method
            IMC.charCensus.append(QString(qc), local_char_census[one_char],
                                  qc.category())
        IMC.needSpellCheck = True  # after a census this is true
        IMC.staleCensus = 0  # but this is no longer true
        IMC.needMetadataSave |= IMC.wordlistsChanged