Exemple #1
0
 def doSpellcheck(self):
     canspell = IMC.spellCheck.isUp()
     nwords = IMC.wordCensus.size()
     if 0 >= nwords : # could be zero in a null document
         return
     pqMsgs.startBar(nwords,"Checking spelling...")
     for i in range(IMC.wordCensus.size()):
         (qword, cnt, wflags) = IMC.wordCensus.get(i)
         wflags = wflags & (0xff - IMC.WordMisspelt) # turn off flag if on
         # some words have /dict-tag, split that out as string or ""
         (w,x,d) = unicode(qword).partition("/")
         if IMC.goodWordList.check(w):
             pass
         elif IMC.badWordList.check(w) :
             wflags |= IMC.WordMisspelt
         elif canspell : # check word in its optional dictionary
             if not ( IMC.spellCheck.check(w,d) ) :
                 wflags |= IMC.WordMisspelt
         IMC.wordCensus.setflags(i,wflags)
         if 0 == i & 0x1f :
             pqMsgs.rollBar(i)
     pqMsgs.endBar()
     IMC.needMetadataSave |= IMC.wordlistsChanged
     IMC.needSpellCheck = False
     if IMC.spellingHiliteSwitch :
         self.setHighlight(True) # force refresh of spell underlines
Exemple #2
0
 def doSpellcheck(self):
     canspell = IMC.spellCheck.isUp()
     nwords = IMC.wordCensus.size()
     if 0 >= nwords:  # could be zero in a null document
         return
     pqMsgs.startBar(nwords, "Checking spelling...")
     for i in range(IMC.wordCensus.size()):
         (qword, cnt, wflags) = IMC.wordCensus.get(i)
         wflags = wflags & (0xff - IMC.WordMisspelt)  # turn off flag if on
         # some words have /dict-tag, split that out as string or ""
         (w, x, d) = unicode(qword).partition("/")
         if IMC.goodWordList.check(w):
             pass
         elif IMC.badWordList.check(w):
             wflags |= IMC.WordMisspelt
         elif canspell:  # check word in its optional dictionary
             if not (IMC.spellCheck.check(w, d)):
                 wflags |= IMC.WordMisspelt
         IMC.wordCensus.setflags(i, wflags)
         if 0 == i & 0x1f:
             pqMsgs.rollBar(i)
     pqMsgs.endBar()
     IMC.needMetadataSave |= IMC.wordlistsChanged
     IMC.needSpellCheck = False
     if IMC.spellingHiliteSwitch:
         self.setHighlight(True)  # force refresh of spell underlines
Exemple #3
0
 def doCensus(self, page=False) :
     global reLineSep, reTokens, reLang, qcLess
     # Clear the current census values
     IMC.wordCensus.clear()
     IMC.charCensus.clear()
     # Count chars locally for speed
     local_char_census = defaultdict(int)
     # Name of current alternate dictionary
     alt_dict = QString() # isEmpty when none
     # Tag from which we set an alternate dict
     alt_dict_tag = QString()
     # Start the progress bar based on the number of lines in the document
     pqMsgs.startBar(self.document().blockCount(),"Counting words and chars...")
     # Find the first text block of interest, skipping an HTML header file
     qtb = self.document().begin() # first text block
     if IMC.bookType.startsWith(QString(u"htm")) \
     and qtb.text().startsWith(QString(u"<!DOCTYPE")) :
         while (qtb != self.document().end()) \
         and (not qtb.text().startsWith(QString(u"<body"))) :
             qtb = qtb.next()
     # Scan all lines of the document to the end.
     while qtb != self.document().end() :
         qsLine = qtb.text() # text of line as qstring
         dbg = qsLine.size()
         dbg2 = qtb.length()
         if reLineSep.exactMatch(qsLine): # this is a page separator line
             if page :
                 # We are doing page seps, it's for Open with no .meta seen,
                 # the page table has been cleared. Store the page sep
                 # data in the page table, with a textCursor to its start.
                 qsfilenum = reLineSep.cap(1) # xxx from "File: xxx.png"
                 qsproofers = reLineSep.cap(2) # \who\x\blah\etc
                 # proofer names can contain spaces, replace with en-space char
                 qsproofers.replace(QChar(" "),QChar(0x2002))
                 # create a new TextCursor instance
                 tcursor = QTextCursor(self.document())
                 # point it to this text block
                 tcursor.setPosition(qtb.position())
                 # dump all that in the page table
                 IMC.pageTable.loadPsep(tcursor, qsfilenum, qsproofers)
             # else not doing pages, just ignore this psep line
         else: # not psep, ordinary text line, count chars and words
             pyLine = unicode(qsLine) # move into Python space to count
             for c in pyLine :
                 local_char_census[c] += 1
             j = 0
             while True:
                 j = reTokens.indexIn(qsLine,j)
                 if j < 0 : # no more word-like units
                     break
                 qsWord = reTokens.cap(0)
                 j += qsWord.size()
                 if qsWord.startsWith(qcLess) :
                     # Examine a captured HTML production.
                     if not reTokens.cap(2).isEmpty() :
                         # HTML open tag, look for lang='dict'
                         if 0 <= reLang.indexIn(reTokens.cap(3)) :
                             # found it: save tag and dict name
                             alt_dict_tag = QString(reTokens.cap(2))
                             alt_dict = QString(reLang.cap(1))
                             alt_dict.prepend(u'/') # make "/en_GB"
                         # else no lang= attribute
                     else:
                         # HTML close tag, see if it closes alt dict use
                         if reTokens.cap(5) == alt_dict_tag :
                             # yes, matches open-tag for dict, clear it
                             alt_dict_tag = QString()
                             alt_dict = QString()
                         # else no alt dict in use, or didn't match
                 else : # did not start with "<", process as a word
                     # Set the property flags, which is harder now we don't
                     # look at every character. Use the QString facilities
                     # rather than python because python .isalnum fails
                     # for a hyphenated number "1850-1910".
                     flag = 0
                     if 0 != qsWord.compare(qsWord.toLower()) :
                         flag |= IMC.WordHasUpper
                     if 0 != qsWord.compare(qsWord.toUpper()) :
                         flag |= IMC.WordHasLower
                     if qsWord.contains(qcHyphen) :
                         flag |= IMC.WordHasHyphen
                     if qsWord.contains(qcApostrophe) or qsWord.contains(qcCurlyApostrophe) :
                         flag |= IMC.WordHasApostrophe
                     if qsWord.contains(reDigit) :
                         flag |= IMC.WordHasDigit
                     IMC.wordCensus.count(qsWord.append(alt_dict),flag)
             # end "while any more words in this line"
         # end of not-a-psep-line processing
         qtb = qtb.next() # move on to next block
         if (0 == (qtb.blockNumber() & 255)) : #every 256th block
             pqMsgs.rollBar(qtb.blockNumber()) # roll the bar
             QApplication.processEvents()
     # end of scanning all text blocks in the doc
     pqMsgs.endBar()
     # we accumulated the char counts in localCharCensus. Now read it out
     # in sorted order and stick it in the IMC.charCensus list.
     for one_char in sorted(local_char_census.keys()):
         qc = QChar(ord(one_char)) # get to QChar for category() method
         IMC.charCensus.append(QString(qc),local_char_census[one_char],qc.category())
     IMC.needSpellCheck = True # after a census this is true
     IMC.staleCensus = 0 # but this is no longer true
     IMC.needMetadataSave |= IMC.wordlistsChanged
Exemple #4
0
 def loadStarts(self):
     pqMsgs.startBar(100,"Loading HTML")
Exemple #5
0
 def loadStarts(self):
     pqMsgs.startBar(100, "Loading HTML")
Exemple #6
0
 def doCensus(self, page=False):
     global reLineSep, reTokens, reLang, qcLess
     # Clear the current census values
     IMC.wordCensus.clear()
     IMC.charCensus.clear()
     # Count chars locally for speed
     local_char_census = defaultdict(int)
     # Name of current alternate dictionary
     alt_dict = QString()  # isEmpty when none
     # Tag from which we set an alternate dict
     alt_dict_tag = QString()
     # Start the progress bar based on the number of lines in the document
     pqMsgs.startBar(self.document().blockCount(),
                     "Counting words and chars...")
     # Find the first text block of interest, skipping an HTML header file
     qtb = self.document().begin()  # first text block
     if IMC.bookType.startsWith(QString(u"htm")) \
     and qtb.text().startsWith(QString(u"<!DOCTYPE")) :
         while (qtb != self.document().end()) \
         and (not qtb.text().startsWith(QString(u"<body"))) :
             qtb = qtb.next()
     # Scan all lines of the document to the end.
     while qtb != self.document().end():
         qsLine = qtb.text()  # text of line as qstring
         dbg = qsLine.size()
         dbg2 = qtb.length()
         if reLineSep.exactMatch(qsLine):  # this is a page separator line
             if page:
                 # We are doing page seps, it's for Open with no .meta seen,
                 # the page table has been cleared. Store the page sep
                 # data in the page table, with a textCursor to its start.
                 qsfilenum = reLineSep.cap(1)  # xxx from "File: xxx.png"
                 qsproofers = reLineSep.cap(2)  # \who\x\blah\etc
                 # proofer names can contain spaces, replace with en-space char
                 qsproofers.replace(QChar(" "), QChar(0x2002))
                 # create a new TextCursor instance
                 tcursor = QTextCursor(self.document())
                 # point it to this text block
                 tcursor.setPosition(qtb.position())
                 # dump all that in the page table
                 IMC.pageTable.loadPsep(tcursor, qsfilenum, qsproofers)
             # else not doing pages, just ignore this psep line
         else:  # not psep, ordinary text line, count chars and words
             pyLine = unicode(qsLine)  # move into Python space to count
             for c in pyLine:
                 local_char_census[c] += 1
             j = 0
             while True:
                 j = reTokens.indexIn(qsLine, j)
                 if j < 0:  # no more word-like units
                     break
                 qsWord = reTokens.cap(0)
                 j += qsWord.size()
                 if qsWord.startsWith(qcLess):
                     # Examine a captured HTML production.
                     if not reTokens.cap(2).isEmpty():
                         # HTML open tag, look for lang='dict'
                         if 0 <= reLang.indexIn(reTokens.cap(3)):
                             # found it: save tag and dict name
                             alt_dict_tag = QString(reTokens.cap(2))
                             alt_dict = QString(reLang.cap(1))
                             alt_dict.prepend(u'/')  # make "/en_GB"
                         # else no lang= attribute
                     else:
                         # HTML close tag, see if it closes alt dict use
                         if reTokens.cap(5) == alt_dict_tag:
                             # yes, matches open-tag for dict, clear it
                             alt_dict_tag = QString()
                             alt_dict = QString()
                         # else no alt dict in use, or didn't match
                 else:  # did not start with "<", process as a word
                     # Set the property flags, which is harder now we don't
                     # look at every character. Use the QString facilities
                     # rather than python because python .isalnum fails
                     # for a hyphenated number "1850-1910".
                     flag = 0
                     if 0 != qsWord.compare(qsWord.toLower()):
                         flag |= IMC.WordHasUpper
                     if 0 != qsWord.compare(qsWord.toUpper()):
                         flag |= IMC.WordHasLower
                     if qsWord.contains(qcHyphen):
                         flag |= IMC.WordHasHyphen
                     if qsWord.contains(qcApostrophe) or qsWord.contains(
                             qcCurlyApostrophe):
                         flag |= IMC.WordHasApostrophe
                     if qsWord.contains(reDigit):
                         flag |= IMC.WordHasDigit
                     IMC.wordCensus.count(qsWord.append(alt_dict), flag)
             # end "while any more words in this line"
         # end of not-a-psep-line processing
         qtb = qtb.next()  # move on to next block
         if (0 == (qtb.blockNumber() & 255)):  #every 256th block
             pqMsgs.rollBar(qtb.blockNumber())  # roll the bar
             QApplication.processEvents()
     # end of scanning all text blocks in the doc
     pqMsgs.endBar()
     # we accumulated the char counts in localCharCensus. Now read it out
     # in sorted order and stick it in the IMC.charCensus list.
     for one_char in sorted(local_char_census.keys()):
         qc = QChar(ord(one_char))  # get to QChar for category() method
         IMC.charCensus.append(QString(qc), local_char_census[one_char],
                               qc.category())
     IMC.needSpellCheck = True  # after a census this is true
     IMC.staleCensus = 0  # but this is no longer true
     IMC.needMetadataSave |= IMC.wordlistsChanged