Exemple #1
 def areaWidth(self):
     digits = 3
     max_ = max(1, self.editor.blockCount())
     while max_ >= 1000:
         max_ /= 1000
         digits += 1
     return 10 + self.editor.fontMetrics().width(QChar('9')) * digits
Exemple #2
    def getRecipeNumber(self, record):
        # TODO: mdldml: переписать этот метод, ужасно же
        counterCode = getBookkeeperCode(self.personId)
        if not counterCode:
            raise Exception(
                u'Не указан бухгалтерский код подразделения. Бухгалтерский код необходимо указывать у текущего подразделения или у одного из его головных подразделений. Текущим считается подразделение, которому принадлежит врач.'
        counterValue = None
        if counterCode:  # FIXME: Why??
            counterValue = getDocumentNumber(
                self.clientId, counterCode,
            if counterValue and QString(counterValue).contains('#'):
                seria = QString(counterValue).section('#', 0, 0)
                number = QString(counterValue).section('#', 1, 1)
                if QtGui.qApp.justifyDrugRecipeNumber():
                    number = number.rightJustified(7, QChar('0'))
                counterValue = seria + '#' + number

        if counterValue:
            return counterValue
            raise Exception(
                u'Не найден счётчик с кодом, совпадающим с кодом для бухгалтерии текущего либо головного подразделения'
Exemple #3
 def about(self):
     text = QString("GuloMail v0.1\n\n")
         "GuloMail is a freeware email client written in Python using PyQt4 (Python bindings for Nokia's Qt)\n\n"
     text.append("GulonSoft 2010\nhttp://www.gulon.co.uk/")
     QMessageBox.about(self, "About GuloMail", text)
Exemple #4
 def lineNumberAreaWidth(self):
     digits = 1
     mmax = max(1, self.blockCount())
     while (mmax >= 10):
         mmax /= 10
         digits += 1
     space = 3 + self.fontMetrics().width(QChar('9')) * digits
     return space
Exemple #5
 def paint(self, painter, option, index):
     itype = index.data(Qt.AccessibleDescriptionRole).toString()
     if itype == "parent":
         option.state |= QStyle.State_Enabled
     elif itype == "child":
         indent = option.fontMetrics.width(QString(4, QChar(' ')))
         option.rect.adjust(indent, 0, 0, 0)
         option.textElideMode = Qt.ElideNone
     QItemDelegate().paint(painter, option, index)
Exemple #6
 def keyPressEvent(self, event):
     key = int(event.key())
     mods = int(event.modifiers())
     if (key == Qt.Key_C) and (mods & Qt.ControlModifier) :
         # PyQt4's implementation of QTableView::selectedIndexes() does
         # not return a QList but rather a Python list of indices.
         lix = self.selectedIndexes()
         if len(lix) : # non-zero selection
             ans = QString()
             for ix in lix :
                     self.model().data(ix, Qt.DisplayRole).toString()
                 ans.append(u' ')
             ans.chop(1) # drop final space
     elif (0 == key & 0x01000000) and \
          ( (mods == Qt.NoModifier) or (mods == Qt.ShiftModifier) ) and \
          ( 0 == self.horizontalHeader().sortIndicatorSection()) and \
          ( Qt.AscendingOrder == self.horizontalHeader().sortIndicatorOrder() ):
         # An ordinary data key with or without shift, and the table
         # is sorted on column 0, the words, and sorted ascending.
         sortProxy = self.panelRef.proxy
         rc = self.panelRef.caseSwitch.isChecked()
         qc = QChar(key)
         if rc and (mods == Qt.NoModifier) :
             qc = qc.toLower()
         hi = sortProxy.rowCount()
         lo = 0
         while (lo < hi) :
             mid = (lo + hi) // 2
             cc = sortProxy.data(sortProxy.index(mid,0)).toString().at(0)
             if not rc : cc = cc.toUpper()
             if qc > cc :
                 lo = mid + 1
             else :
                 hi = mid
     if not event.isAccepted() : # if we didn't handle it, pass it up
         super(myTableView, self).keyPressEvent(event)
Exemple #7
 def loadPsep(self, tc, qsFile, qs_proofers):
     # Little-known Python fact: in a string \b (like the start of \bertha above)
     # actually stands for the BEL code, \x08. Hence we cannot take the PGDP
     # proofer string into Python and split it on \, we would get "sam\x08ertha"
     # from the above example. So first use QString.replace \ with comma.
     # Also drop the leading \ to avoid a null list item.
     qs_proofers = qs_proofers.remove(0, 1)
     qs_proofers = qs_proofers.replace(QChar(u'\\'), QChar(u','))
     proofers = unicode(qs_proofers).split(',')  # list of names
     new_row = [tc, QString(qsFile), proofers, 0, 0, 0, 0]
     if self.size():
         # not the first row in an empty DB
         new_row[self._Fofor] = IMC.FolioFormatSame
         new_row[self._Foact] = IMC.FolioRuleAdd1
     else:  # size of 0: first row
         new_row[self._Fofor] = IMC.FolioFormatArabic
         new_row[self._Foact] = IMC.FolioRuleSet
         self._explict_formats.add(0)  #initialize set to one member
     index = self._last_value
     self._last_value += 1
     self.setValue(index, self._last_value)  # also updates display value
Exemple #8
 def metaStringIn(self, qline):
     parts = unicode(qline).split(' ')
     tc = QTextCursor(IMC.editWidget.document())
     # see comments on proofer string in loadPsep above.
     qs_proofers = QString(parts[2])
     qs_proofers = qs_proofers.remove(0, 1)
     qs_proofers = qs_proofers.replace(QChar(u'\\'), QChar(u','))
     proofers = unicode(qs_proofers).split(',')
     row = [tc, QString(parts[1]), proofers, 0, 0, 0, 0]
     index = self.size()  # index of row to be
     row[self._Foact] = int(parts[3])
     fcode = int(parts[4])
     if fcode != self._last_format:
         # either row 0 and self._last_format is None, or a change of format
         self._last_format = fcode
         # put index of row we are about to add, into set of explicit formats
         fcode = IMC.FolioFormatSame
     row[self._Fofor] = fcode
     self.setValue(index, int(parts[5]))  # also updates display
Exemple #9
 def refresh(self, clearCache=False):
     # this could be first refresh for this book file, so set the
     # base URL for its images.
     sep = QChar(u'/')
     qsp = QString(IMC.bookDirPath)
     if not qsp.endsWith(sep):
     self.baseURL = QUrl.fromLocalFile(qsp)
     # this might be the second or nth refresh of the book, note the
     # scroll position so we can restore it in loadEnds below. This
     # means that when you make a little edit at the end of a book, and
     # refresh the preview, you won't have to scroll down to the end
     # for the 500th time to see your changes.
     self.scrollPosition = self.webPage.mainFrame().scrollPosition()
     if clearCache:
     # We are reloading our base page, so clear any history of prior links
     self.preview.setHtml(IMC.editWidget.toPlainText(), self.baseURL)
Exemple #10
    def testQgsFontMarkerSymbolLayerV2(self):
        Create a new style from a .sld file and match test
        mTestName = QString('QgsFontMarkerSymbolLayerV2')
        mFilePath = QDir.toNativeSeparators(

        mDoc = QDomDocument(mTestName)
        mFile = QFile(mFilePath)
        mDoc.setContent(mFile, True)
        mSymbolLayer = QgsFontMarkerSymbolLayerV2.createFromSld(

        mExpectedValue = type(QgsFontMarkerSymbolLayerV2())
        mValue = type(mSymbolLayer)
        mMessage = 'Expected "%s" got "%s"' % (mExpectedValue, mValue)
        assert mExpectedValue == mValue, mMessage

        mExpectedValue = QString(u'Arial')
        mValue = mSymbolLayer.fontFamily()
        mMessage = 'Expected "%s" got "%s"' % (mExpectedValue, mValue)
        assert mExpectedValue == mValue, mMessage

        mExpectedValue = QChar(0x4d)
        mValue = mSymbolLayer.character()
        mMessage = 'Expected "%s" got "%s"' % (mExpectedValue, mValue)
        assert mExpectedValue == mValue, mMessage

        mExpectedValue = 6.23
        mValue = mSymbolLayer.size()
        mMessage = 'Expected "%s" got "%s"' % (mExpectedValue, mValue)
        assert mExpectedValue == mValue, mMessage

        mExpectedValue = 3
        mValue = mSymbolLayer.angle()
        mMessage = 'Expected "%s" got "%s"' % (mExpectedValue, mValue)
        assert mExpectedValue == mValue, mMessage
Exemple #11
    def generateFontSymbol(self, mapbasicString, name):
        """ Generates the qml symbol block from a MapInfo font symbol string.

        mapbasicString -- The mapbasic symbol string.
        name -- the name of the symbol for qgis, normally 1..n

        Return -- A qml string based on closest match to
        the givin Mapbasic string.
        # MAPBASIC Font Symbol syntax:
        # Symbol ( shape, color, size, fontname, fontstyle, rotation )

        fontTemplate = templateLookup['symbolFont']

        tokens = mapbasicString[mapbasicString.index('(') +
        rgb = self.colorToRGB(tokens[1])
        rgbString = "%s,%s,%s" % (rgb[0], rgb[1], rgb[2])
        shape = QChar(int(tokens[0])).toAscii()
        # Handle escaping special XML tokens.
        if shape == '<':
            shape = '&lt;'
        elif shape == '&':
            shape = '&amp;'
        elif shape == '"':
            shape = '&quot;'
        values = dict(
            color=rgbString,  # Color needs to be converted to RGB
            size=self.pointTomm(tokens[2]),  # Mapasic size 3 points == 1 mm
            angle=int(tokens[5]) / 180,  # MapInfo rotation is back to front.

        # Generate the xml for a font marker
        return fontTemplate.safe_substitute(values)
Exemple #12
    def __init__(self):

        # Controls on the edit syntax hiliter, queried in the editor and
        # set by the Main window View menu actions:
        self.scannoHiliteSwitch = False
        self.spellingHiliteSwitch = False

        # Document/file status variables: more than a simple "dirty" flag.
        # Just a brute count of calls to the textChanged signal slot.
        self.editCounter = 0
        # needSpellCheck when a word census has been done, or when a
        # different main dictionary is selected. Cleared when a spellcheck is
        # done, e.g. from the Refresh button in the Word panel.
        self.needSpellCheck = False
        # Stale census has two bits:
        self.staleCensusLoaded = 0x02 # census was stale as of File>Load
        self.staleCensusAcquired = 0x01 # doc changed since last census
        self.staleCensus = 0x00
        # needMetadataSave is a bit set of:
        self.goodwordsChanged = 0x01
        self.notePanelChanged = 0x02
        self.pagePanelChanged = 0x04
        self.bookmarksChanged = 0x08
        self.wordlistsChanged = 0x10
        self.propertyChanged = 0x20
        # a word is added to goodwords, or we do a spellcheck or a word census.
        # Cleared on Save, Save-As or New.
        self.needMetadataSave = 0x00
        # Note that the equivalent switch "needDocumentSave" is kept by the
        # QTextDocument as IMC.editWidget.document().isModified() -- we rely on
        # that because it tracks the undo/redo actions and knows if the user
        # has backed out all changes or not.

        # Other document properties read/written in metadata
        self.bookSaveEncoding = None # encoding for saves: UTF-8 or ISO-8859-1
        self.documentHash = b'' # hash of document contents
        self.metaHash = b'' # hash as read from the .meta file
        self.bookMainDict = None # QString of preferred main dict for this book

        # These values are used in forming the word classification
        # flag for words in the word census. Prepared in the census
        # in pqEdit, referenced in pqWord.
        self.WordHasUpper = 0x01
        self.WordHasLower = 0x02
        self.WordHasDigit = 0x04
        self.WordHasHyphen = 0x08
        self.WordHasApostrophe = 0x10
        self.WordMisspelt = 0x80

        # These values are used to encode folio controls for the
        # Page/folio table. Initialized in pqEdit when opening a new file,
        # used in pqWord and (eventually) in html conversion.
        self.FolioFormatArabic = 0x00
        self.FolioFormatUCRom = 0x01
        self.FolioFormatLCRom = 0x02
        self.FolioFormatSame = 0x03 # the "ditto" format
        self.FolioRuleAdd1 = 0x00
        self.FolioRuleSet = 0x01
        self.FolioRuleSkip = 0x02

        # Pointers initialized in ppqt, filled in in pqEdit,
        # and referenced everywhere else
        self.settings = None # QSettings for global save/restore app values
        self.appBasePath = None # path to the our dist folder for extras, dict, fonts
        self.dictPath = None # path to folder where we look for dictionaries
        self.scannoList = None # list loaded from a scannos file for hiliting
        self.goodWordList = None # good words
        self.badWordList = None # bad words
        self.wordCensus = None # census of words (tokens actually)
        self.charCensus = None # census of characters
        self.pageTable = None # list of page separators
        self.currentImageNumber = None # qstring e.g. "002" of png file
        self.editWidget = None # main QPlainTextEdit set up in pqMain
        self.spellCheck = None # spellcheck object from pqSpell
        self.mainWindow = None # ref to main window

        # Pointers initialized in pqMain to various major objects
        self.bookPath = None # absolute path to book file
        self.bookDirPath = None # absolute path to book directory
        self.bookType = None # book file suffix used to detect .hmt(l)
        self.defaultFontFamily = None # name of preferred font, usually 'Liberation Mono'
        self.fontFamily = None # last-chosen font
        self.fontSize = 12 # last-chosen font size in Edit panel
        self.pngPanel = None # ref to Pngs panel
        self.notesEditor = None # ref to Notes panel
        self.findPanel = None # ref to Find panel
        self.statusBar = None # ref to status bar of main window
        self.progressBar = None # ref to progress bar in status bar

        # constant value for the line-delimiter used by QPlainTextEdit
        self.QtLineDelim = QChar(0x2029)
        # constant value for the zero-width-non-joiner used as a marker in reflow
        self.ZWNJ = QChar(0x200C)

        # Keystrokes checked by editor and other panels that monitor KeyEvent signals.
        # In rough order of frequency of use, we support:
        # ^g and ^G, search again forward/backward,
        # ^f start search,
        # ^t and ^T replace and search forward/backward,
        # ^1-9 bookmarks
        # ^F start search with selection
        # ^= replace,
        # ^-alt-1-9 set bookmarks
        # ^+ and ^- for zoom in/out
        # ctl-plus can also appear as ctl-shft-equal and ctl-sht-plus!
        # ^l and ^-alt-l, ^p and ^-alt-p for the Notes panel
        # Define these in a way easy to check in a keyEvent slot, and also put
        # them in python lists for quick lookup.
        self.keypadDeModifier = int(0xffffffff ^ Qt.KeypadModifier)
        self.ctl_G = Qt.ControlModifier | Qt.Key_G
        self.ctl_shft_G = Qt.ShiftModifier | self.ctl_G
        self.ctl_F = Qt.ControlModifier | Qt.Key_F
        self.ctl_shft_F = Qt.ShiftModifier | self.ctl_F
        self.ctl_T = Qt.ControlModifier | Qt.Key_T
        self.ctl_shft_T = Qt.ShiftModifier | self.ctl_T
        self.ctl_1 = Qt.ControlModifier | Qt.Key_1
        self.ctl_2 = Qt.ControlModifier | Qt.Key_2
        self.ctl_3 = Qt.ControlModifier | Qt.Key_3
        self.ctl_4 = Qt.ControlModifier | Qt.Key_4
        self.ctl_5 = Qt.ControlModifier | Qt.Key_5
        self.ctl_6 = Qt.ControlModifier | Qt.Key_6
        self.ctl_7 = Qt.ControlModifier | Qt.Key_7
        self.ctl_8 = Qt.ControlModifier | Qt.Key_8
        self.ctl_9 = Qt.ControlModifier | Qt.Key_9
        self.ctl_shft_1 = Qt.ShiftModifier | self.ctl_1
        self.ctl_shft_2 = Qt.ShiftModifier | self.ctl_2
        self.ctl_shft_3 = Qt.ShiftModifier | self.ctl_3
        self.ctl_shft_4 = Qt.ShiftModifier | self.ctl_4
        self.ctl_shft_5 = Qt.ShiftModifier | self.ctl_5
        self.ctl_shft_6 = Qt.ShiftModifier | self.ctl_6
        self.ctl_shft_7 = Qt.ShiftModifier | self.ctl_7
        self.ctl_shft_8 = Qt.ShiftModifier | self.ctl_8
        self.ctl_shft_9 = Qt.ShiftModifier | self.ctl_9
        self.ctl_alt_1 = Qt.AltModifier | self.ctl_1
        self.ctl_alt_2 = Qt.AltModifier | self.ctl_2
        self.ctl_alt_3 = Qt.AltModifier | self.ctl_3
        self.ctl_alt_4 = Qt.AltModifier | self.ctl_4
        self.ctl_alt_5 = Qt.AltModifier | self.ctl_5
        self.ctl_alt_6 = Qt.AltModifier | self.ctl_6
        self.ctl_alt_7 = Qt.AltModifier | self.ctl_7
        self.ctl_alt_8 = Qt.AltModifier | self.ctl_8
        self.ctl_alt_9 = Qt.AltModifier | self.ctl_9
        self.ctl_minus = Qt.ControlModifier | Qt.Key_Minus
        self.ctl_equal = Qt.ControlModifier | Qt.Key_Equal
        self.ctl_plus = Qt.ControlModifier | Qt.Key_Plus
        self.ctl_shft_equal = Qt.ShiftModifier | self.ctl_equal
        self.ctl_shft_plus = Qt.ShiftModifier | self.ctl_plus
        self.ctl_M = Qt.ControlModifier | Qt.Key_M
        self.ctl_alt_M = Qt.AltModifier | self.ctl_M
        self.ctl_L = Qt.ControlModifier | Qt.Key_L
        self.ctl_alt_L = Qt.AltModifier | self.ctl_L
        self.ctl_P = Qt.ControlModifier | Qt.Key_P
        self.ctl_alt_P = Qt.AltModifier | self.ctl_P
        self.ctl_Left = Qt.ControlModifier | Qt.Key_Left
        self.ctl_left_pad = self.ctl_Left | Qt.KeypadModifier
        self.ctl_LBracket = Qt.ControlModifier | Qt.Key_BracketLeft
        self.ctl_B = Qt.ControlModifier | Qt.Key_B
        self.keysOfInterest = [
                self.ctl_G, self.ctl_shft_G, self.ctl_F, self.ctl_shft_F,
                self.ctl_T, self.ctl_shft_T, self.ctl_equal,
                self.ctl_1, self.ctl_2, self.ctl_3, self.ctl_4, self.ctl_5,
                self.ctl_6, self.ctl_7, self.ctl_8, self.ctl_9,
                self.ctl_shft_1, self.ctl_shft_2, self.ctl_shft_3,
                self.ctl_shft_4, self.ctl_shft_5, self.ctl_shft_6,
                self.ctl_shft_7, self.ctl_shft_8, self.ctl_shft_9,
                self.ctl_alt_1, self.ctl_alt_2, self.ctl_alt_3,
                self.ctl_alt_4,  self.ctl_alt_5,  self.ctl_alt_6,
                self.ctl_alt_7, self.ctl_alt_8,  self.ctl_alt_9,
                self.ctl_minus, self.ctl_plus, self.ctl_shft_equal,
        self.backKeys = [self.ctl_B, self.ctl_Left, self.ctl_LBracket, self.ctl_left_pad]
        self.zoomKeys = [self.ctl_minus, self.ctl_plus,
                         self.ctl_shft_equal, self.ctl_shft_plus]
        self.findKeys = [self.ctl_G, self.ctl_shft_G, self.ctl_F, self.ctl_shft_F,
                        self.ctl_T, self.ctl_equal, self.ctl_shft_T]
        self.markKeys = [self.ctl_1, self.ctl_2, self.ctl_3, self.ctl_4, self.ctl_5,
                self.ctl_6, self.ctl_7, self.ctl_8, self.ctl_9 ]
        self.markSetKeys = [self.ctl_alt_1, self.ctl_alt_2, self.ctl_alt_3,
                self.ctl_alt_4,  self.ctl_alt_5,  self.ctl_alt_6,  self.ctl_alt_7,
                self.ctl_alt_8,  self.ctl_alt_9 ]
        self.markShiftKeys = [self.ctl_shft_1, self.ctl_shft_2, self.ctl_shft_3,
                self.ctl_shft_4, self.ctl_shft_5, self.ctl_shft_6,
                self.ctl_shft_7, self.ctl_shft_8, self.ctl_shft_9 ]

        # A list of the 252 Named Entities of HTML 4. The names are indexed
        # by the unicode characters they translate. To complete an entity
        # prepend & and append ;, thus quot -> &quot; (This list was lifted from
        # http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
        # and processed into this form using regex changes in BBEdit.)
        self.namedEntityDict = {
u'\u0022' : u'quot', # quotation mark (= APL quote)
u'\u0026' : u'amp', # ampersand
u'\u0027' : u'apos', # apostrophe (= apostrophe-quote); see below
u'\u003C' : u'lt', # less-than sign
u'\u003E' : u'gt', # greater-than sign
u'\u00A0' : u'nbsp', # no-break space (= non-breaking space)[d]
u'\u00A1' : u'iexcl', # inverted exclamation mark
u'\u00A2' : u'cent', # cent sign
u'\u00A3' : u'pound', # pound sign
u'\u00A4' : u'curren', # currency sign
u'\u00A5' : u'yen', # yen sign (= yuan sign)
u'\u00A6' : u'brvbar', # broken bar (= broken vertical bar)
u'\u00A7' : u'sect', # section sign
u'\u00A8' : u'uml', # diaeresis (= spacing diaeresis); see Germanic umlaut
u'\u00A9' : u'copy', # copyright symbol
u'\u00AA' : u'ordf', # feminine ordinal indicator
u'\u00AB' : u'laquo', # left-pointing double angle quotation mark (= left pointing guillemet)
u'\u00AC' : u'not', # not sign
u'\u00AD' : u'shy', # soft hyphen (= discretionary hyphen)
u'\u00AE' : u'reg', # registered sign ( = registered trademark symbol)
u'\u00AF' : u'macr', # macron (= spacing macron = overline = APL overbar)
u'\u00B0' : u'deg', # degree symbol
u'\u00B1' : u'plusmn', # plus-minus sign (= plus-or-minus sign)
u'\u00B2' : u'sup2', # superscript two (= superscript digit two = squared)
u'\u00B3' : u'sup3', # superscript three (= superscript digit three = cubed)
u'\u00B4' : u'acute', # acute accent (= spacing acute)
u'\u00B5' : u'micro', # micro sign
u'\u00B6' : u'para', # pilcrow sign ( = paragraph sign)
u'\u00B7' : u'middot', # middle dot (= Georgian comma = Greek middle dot)
u'\u00B8' : u'cedil', # cedilla (= spacing cedilla)
u'\u00B9' : u'sup1', # superscript one (= superscript digit one)
u'\u00BA' : u'ordm', # masculine ordinal indicator
u'\u00BB' : u'raquo', # right-pointing double angle quotation mark (= right pointing guillemet)
u'\u00BC' : u'frac14', # vulgar fraction one quarter (= fraction one quarter)
u'\u00BD' : u'frac12', # vulgar fraction one half (= fraction one half)
u'\u00BE' : u'frac34', # vulgar fraction three quarters (= fraction three quarters)
u'\u00BF' : u'iquest', # inverted question mark (= turned question mark)
u'\u00C0' : u'Agrave', # Latin capital letter A with grave accent (= Latin capital letter A grave)
u'\u00C1' : u'Aacute', # Latin capital letter A with acute accent
u'\u00C2' : u'Acirc', # Latin capital letter A with circumflex
u'\u00C3' : u'Atilde', # Latin capital letter A with tilde
u'\u00C4' : u'Auml', # Latin capital letter A with diaeresis
u'\u00C5' : u'Aring', # Latin capital letter A with ring above (= Latin capital letter A ring)
u'\u00C6' : u'AElig', # Latin capital letter AE (= Latin capital ligature AE)
u'\u00C7' : u'Ccedil', # Latin capital letter C with cedilla
u'\u00C8' : u'Egrave', # Latin capital letter E with grave accent
u'\u00C9' : u'Eacute', # Latin capital letter E with acute accent
u'\u00CA' : u'Ecirc', # Latin capital letter E with circumflex
u'\u00CB' : u'Euml', # Latin capital letter E with diaeresis
u'\u00CC' : u'Igrave', # Latin capital letter I with grave accent
u'\u00CD' : u'Iacute', # Latin capital letter I with acute accent
u'\u00CE' : u'Icirc', # Latin capital letter I with circumflex
u'\u00CF' : u'Iuml', # Latin capital letter I with diaeresis
u'\u00D0' : u'ETH', # Latin capital letter Eth
u'\u00D1' : u'Ntilde', # Latin capital letter N with tilde
u'\u00D2' : u'Ograve', # Latin capital letter O with grave accent
u'\u00D3' : u'Oacute', # Latin capital letter O with acute accent
u'\u00D4' : u'Ocirc', # Latin capital letter O with circumflex
u'\u00D5' : u'Otilde', # Latin capital letter O with tilde
u'\u00D6' : u'Ouml', # Latin capital letter O with diaeresis
u'\u00D7' : u'times', # multiplication sign
u'\u00D8' : u'Oslash', # Latin capital letter O with stroke (= Latin capital letter O slash)
u'\u00D9' : u'Ugrave', # Latin capital letter U with grave accent
u'\u00DA' : u'Uacute', # Latin capital letter U with acute accent
u'\u00DB' : u'Ucirc', # Latin capital letter U with circumflex
u'\u00DC' : u'Uuml', # Latin capital letter U with diaeresis
u'\u00DD' : u'Yacute', # Latin capital letter Y with acute accent
u'\u00DE' : u'THORN', # Latin capital letter THORN
u'\u00DF' : u'szlig', # Latin small letter sharp s (= ess-zed); see German Eszett
u'\u00E0' : u'agrave', # Latin small letter a with grave accent
u'\u00E1' : u'aacute', # Latin small letter a with acute accent
u'\u00E2' : u'acirc', # Latin small letter a with circumflex
u'\u00E3' : u'atilde', # Latin small letter a with tilde
u'\u00E4' : u'auml', # Latin small letter a with diaeresis
u'\u00E5' : u'aring', # Latin small letter a with ring above
u'\u00E6' : u'aelig', # Latin small letter ae (= Latin small ligature ae)
u'\u00E7' : u'ccedil', # Latin small letter c with cedilla
u'\u00E8' : u'egrave', # Latin small letter e with grave accent
u'\u00E9' : u'eacute', # Latin small letter e with acute accent
u'\u00EA' : u'ecirc', # Latin small letter e with circumflex
u'\u00EB' : u'euml', # Latin small letter e with diaeresis
u'\u00EC' : u'igrave', # Latin small letter i with grave accent
u'\u00ED' : u'iacute', # Latin small letter i with acute accent
u'\u00EE' : u'icirc', # Latin small letter i with circumflex
u'\u00EF' : u'iuml', # Latin small letter i with diaeresis
u'\u00F0' : u'eth', # Latin small letter eth
u'\u00F1' : u'ntilde', # Latin small letter n with tilde
u'\u00F2' : u'ograve', # Latin small letter o with grave accent
u'\u00F3' : u'oacute', # Latin small letter o with acute accent
u'\u00F4' : u'ocirc', # Latin small letter o with circumflex
u'\u00F5' : u'otilde', # Latin small letter o with tilde
u'\u00F6' : u'ouml', # Latin small letter o with diaeresis
u'\u00F7' : u'divide', # division sign (= obelus)
u'\u00F8' : u'oslash', # Latin small letter o with stroke (= Latin small letter o slash)
u'\u00F9' : u'ugrave', # Latin small letter u with grave accent
u'\u00FA' : u'uacute', # Latin small letter u with acute accent
u'\u00FB' : u'ucirc', # Latin small letter u with circumflex
u'\u00FC' : u'uuml', # Latin small letter u with diaeresis
u'\u00FD' : u'yacute', # Latin small letter y with acute accent
u'\u00FE' : u'thorn', # Latin small letter thorn
u'\u00FF' : u'yuml', # Latin small letter y with diaeresis
u'\u0152' : u'OElig', # Latin capital ligature oe[e]
u'\u0153' : u'oelig', # Latin small ligature oe[e]
u'\u0160' : u'Scaron', # Latin capital letter s with caron
u'\u0161' : u'scaron', # Latin small letter s with caron
u'\u0178' : u'Yuml', # Latin capital letter y with diaeresis
u'\u0192' : u'fnof', # Latin small letter f with hook (= function = florin)
u'\u02C6' : u'circ', # modifier letter circumflex accent
u'\u02DC' : u'tilde', # small tilde
u'\u0391' : u'Alpha', # Greek capital letter Alpha
u'\u0392' : u'Beta', # Greek capital letter Beta
u'\u0393' : u'Gamma', # Greek capital letter Gamma
u'\u0394' : u'Delta', # Greek capital letter Delta
u'\u0395' : u'Epsilon', # Greek capital letter Epsilon
u'\u0396' : u'Zeta', # Greek capital letter Zeta
u'\u0397' : u'Eta', # Greek capital letter Eta
u'\u0398' : u'Theta', # Greek capital letter Theta
u'\u0399' : u'Iota', # Greek capital letter Iota
u'\u039A' : u'Kappa', # Greek capital letter Kappa
u'\u039B' : u'Lambda', # Greek capital letter Lambda
u'\u039C' : u'Mu', # Greek capital letter Mu
u'\u039D' : u'Nu', # Greek capital letter Nu
u'\u039E' : u'Xi', # Greek capital letter Xi
u'\u039F' : u'Omicron', # Greek capital letter Omicron
u'\u03A0' : u'Pi', # Greek capital letter Pi
u'\u03A1' : u'Rho', # Greek capital letter Rho
u'\u03A3' : u'Sigma', # Greek capital letter Sigma
u'\u03A4' : u'Tau', # Greek capital letter Tau
u'\u03A5' : u'Upsilon', # Greek capital letter Upsilon
u'\u03A6' : u'Phi', # Greek capital letter Phi
u'\u03A7' : u'Chi', # Greek capital letter Chi
u'\u03A8' : u'Psi', # Greek capital letter Psi
u'\u03A9' : u'Omega', # Greek capital letter Omega
u'\u03B1' : u'alpha', # Greek small letter alpha
u'\u03B2' : u'beta', # Greek small letter beta
u'\u03B3' : u'gamma', # Greek small letter gamma
u'\u03B4' : u'delta', # Greek small letter delta
u'\u03B5' : u'epsilon', # Greek small letter epsilon
u'\u03B6' : u'zeta', # Greek small letter zeta
u'\u03B7' : u'eta', # Greek small letter eta
u'\u03B8' : u'theta', # Greek small letter theta
u'\u03B9' : u'iota', # Greek small letter iota
u'\u03BA' : u'kappa', # Greek small letter kappa
u'\u03BB' : u'lambda', # Greek small letter lambda
u'\u03BC' : u'mu', # Greek small letter mu
u'\u03BD' : u'nu', # Greek small letter nu
u'\u03BE' : u'xi', # Greek small letter xi
u'\u03BF' : u'omicron', # HTMLsymbol	NEW	Greek small letter omicron
u'\u03C0' : u'pi', # Greek small letter pi
u'\u03C1' : u'rho', # Greek small letter rho
u'\u03C2' : u'sigmaf', # Greek small letter final sigma
u'\u03C3' : u'sigma', # Greek small letter sigma
u'\u03C4' : u'tau', # Greek small letter tau
u'\u03C5' : u'upsilon', # Greek small letter upsilon
u'\u03C6' : u'phi', # Greek small letter phi
u'\u03C7' : u'chi', # Greek small letter chi
u'\u03C8' : u'psi', # Greek small letter psi
u'\u03C9' : u'omega', # Greek small letter omega
u'\u03D1' : u'thetasym', # NEW	Greek theta symbol
u'\u03D2' : u'upsih', # NEW	Greek Upsilon with hook symbol
u'\u03D6' : u'piv', # Greek pi symbol
u'\u2002' : u'ensp', # en space[d]
u'\u2003' : u'emsp', # em space[d]
u'\u2009' : u'thinsp', # thin space[d]
u'\u200C' : u'zwnj', # zero-width non-joiner
u'\u200D' : u'zwj', # zero-width joiner
u'\u200E' : u'lrm', # NEW RFC 2070	left-to-right mark
u'\u200F' : u'rlm', # NEW RFC 2070	right-to-left mark
u'\u2013' : u'ndash', # en dash
u'\u2014' : u'mdash', # em dash
u'\u2018' : u'lsquo', # left single quotation mark
u'\u2019' : u'rsquo', # right single quotation mark
u'\u201A' : u'sbquo', # HTMLspecial	NEW	single low-9 quotation mark
u'\u201C' : u'ldquo', # left double quotation mark
u'\u201D' : u'rdquo', # right double quotation mark
u'\u201E' : u'bdquo', # HTMLspecial	NEW	double low-9 quotation mark
u'\u2020' : u'dagger', # dagger, obelisk
u'\u2021' : u'Dagger', # double dagger, double obelisk
u'\u2022' : u'bull', # bullet (= black small circle)[f]
u'\u2026' : u'hellip', # horizontal ellipsis (= three dot leader)
u'\u2030' : u'permil', # per mille sign
u'\u2032' : u'prime', # prime (= minutes = feet)
u'\u2033' : u'Prime', # double prime (= seconds = inches)
u'\u2039' : u'lsaquo', # proposed	single left-pointing angle quotation mark[g]
u'\u203A' : u'rsaquo', # proposed	single right-pointing angle quotation mark[g]
u'\u203E' : u'oline', # NEW	overline (= spacing overscore)
u'\u2044' : u'frasl', # NEW	fraction slash (= solidus)
u'\u20AC' : u'euro', # HTMLspecial	NEW	euro sign
u'\u2111' : u'image', # black-letter capital I (= imaginary part)
u'\u2118' : u'weierp', # script capital P (= power set = Weierstrass p)
u'\u211C' : u'real', # black-letter capital R (= real part symbol)
u'\u2122' : u'trade', # trademark symbol
u'\u2135' : u'alefsym', # NEW	alef symbol (= first transfinite cardinal)[h]
u'\u2190' : u'larr', # leftwards arrow
u'\u2191' : u'uarr', # upwards arrow
u'\u2192' : u'rarr', # rightwards arrow
u'\u2193' : u'darr', # downwards arrow
u'\u2194' : u'harr', # left right arrow
u'\u21B5' : u'crarr', # NEW	downwards arrow with corner leftwards (= carriage return)
u'\u21D0' : u'lArr', # leftwards double arrow[i]
u'\u21D1' : u'uArr', # upwards double arrow
u'\u21D2' : u'rArr', # rightwards double arrow[j]
u'\u21D3' : u'dArr', # downwards double arrow
u'\u21D4' : u'hArr', # left right double arrow
u'\u2200' : u'forall', # for all
u'\u2202' : u'part', # partial differential
u'\u2203' : u'exist', # there exists
u'\u2205' : u'empty', # empty set (= null set = diameter)
u'\u2207' : u'nabla', # nabla (= backward difference)
u'\u2208' : u'isin', # element of
u'\u2209' : u'notin', # not an element of
u'\u220B' : u'ni', # contains as member
u'\u220F' : u'prod', # n-ary product (= product sign)[k]
u'\u2211' : u'sum', # n-ary summation[l]
u'\u2212' : u'minus', # minus sign
u'\u2217' : u'lowast', # asterisk operator
u'\u221A' : u'radic', # square root (= radical sign)
u'\u221D' : u'prop', # proportional to
u'\u221E' : u'infin', # infinity
u'\u2220' : u'ang', # angle
u'\u2227' : u'and', # logical and (= wedge)
u'\u2228' : u'or', # logical or (= vee)
u'\u2229' : u'cap', # intersection (= cap)
u'\u222A' : u'cup', # union (= cup)
u'\u222B' : u'int', # integral
u'\u2234' : u'there4', # therefore sign
u'\u223C' : u'sim', # tilde operator (= varies with = similar to)[m]
u'\u2245' : u'cong', # congruent to
u'\u2248' : u'asymp', # almost equal to (= asymptotic to)
u'\u2260' : u'ne', # not equal to
u'\u2261' : u'equiv', # identical to; sometimes used for 'equivalent to'
u'\u2264' : u'le', # less-than or equal to
u'\u2265' : u'ge', # greater-than or equal to
u'\u2282' : u'sub', # subset of
u'\u2283' : u'sup', # superset of[n]
u'\u2284' : u'nsub', # not a subset of
u'\u2286' : u'sube', # subset of or equal to
u'\u2287' : u'supe', # superset of or equal to
u'\u2295' : u'oplus', # circled plus (= direct sum)
u'\u2297' : u'otimes', # circled times (= vector product)
u'\u22A5' : u'perp', # up tack (= orthogonal to = perpendicular)[o]
u'\u22C5' : u'sdot', # dot operator[p]
u'\u2308' : u'lceil', # left ceiling (= APL upstile)
u'\u2309' : u'rceil', # right ceiling
u'\u230A' : u'lfloor', # left floor (= APL downstile)
u'\u230B' : u'rfloor', # right floor
u'\u2329' : u'lang', # left-pointing angle bracket (= bra)[q]
u'\u232A' : u'rang', # right-pointing angle bracket (= ket)[r]
u'\u25CA' : u'loz', # lozenge
u'\u2660' : u'spades', # black spade suit[f]
u'\u2663' : u'clubs', # black club suit (= shamrock)[f]
u'\u2665' : u'hearts', # black heart suit (= valentine)[f]
u'\u2666' : u'diams' # black diamond suit[f]
        # Identify the platform and operating system, in case we find a reason to care
        self.version_pyqt = PYQT_VERSION_STR # python string eg '4.9.5'
        self.version_qt = QT_VERSION_STR # python string e.g. '4.8.0'
            junk = QSysInfo.WindowsVersion
            self.osType = 'Win'
        except: # that static var does not exist, so,
                junk = QSysInfo.MacintoshVersion
                self.osType = 'Mac'
            except: # that doesn't exist either, ergo:
                self.osType = 'Linux'
 def currentLocation(self):
     return QChar('A' + self.currentColumn()) + \
            QString.number(self.currentRow() + 1)
Exemple #14
    fn = QFileDialog.getOpenFileName(None, "Select a Test Book")
    fh = QFile(fn)
    if not fh.open(QFile.ReadOnly):
        raise IOError, unicode(fh.errorString())
    stream = QTextStream(fh)
    IMC.editWidget.setPlainText(stream.readAll())  # load the editor doc
    # Code from pqEdit to parse a doc by lines and extract page seps.
    reLineSep = QRegExp(
        u'-----File: ([^\\.]+)\\.png---((\\\\[^\\\\]*)+)\\\\-*',
    qtb = IMC.editWidget.document().begin()  # first text block
    while qtb != IMC.editWidget.document().end():  # up to end of document
        qsLine = qtb.text()  # text of line as qstring
        if reLineSep.exactMatch(qsLine):  # a page separator
            qsfilenum = reLineSep.capturedTexts()[1]
            qsproofers = reLineSep.capturedTexts()[2]
            # proofer names can contain spaces, replace with en-space char
            qsproofers.replace(QChar(" "), QChar(0x2002))
            tcursor = QTextCursor(IMC.editWidget.document())
            IMC.pageTable.loadPsep(tcursor, qsfilenum, qsproofers)
        # ignore non-seps
        qtb = qtb.next()
Exemple #15
    def test(self, mode='current_pos'):
        cursor = self.textedit.textCursor()

        if mode == 'current_pos':
            _logger.debug('current pos: %s', cursor.position())
        elif mode == 'single_line_comment':

            cursor.insertText(self.comment_str + ' ')

        elif mode == 'get_range':
            start_pos, end_pos = [
            _logger.debug('range: %s %s', start_pos, end_pos)
            self.sel_range = (start_pos, end_pos)

        elif mode == 'load_range':
            start_pos, end_pos = self.sel_range
            _logger.debug('load range: %s %s', start_pos, end_pos)

            cursor = self.textedit.textCursor()
            cursor.movePosition(QTextCursor.Right, QTextCursor.KeepAnchor,
                                end_pos - start_pos)

        elif mode == 'multi_line_comment':
            # find selected lines
            start_pos, end_pos = [

            first_line, last_line = self.get_line_range(
                cursor, start_pos, end_pos)

            # If the selection contains only commented lines and surrounding
            # whitespace, uncomment. Otherwise, comment.
            is_comment_or_whitespace = True
            at_least_one_comment = False
            for _line_nb in range(first_line, last_line + 1):

                text = str(cursor.block().text()).lstrip()
                _logger.debug('dealing text: %s', text)

                is_comment = text.startswith(self.comment_str)
                is_whitespace = (text == '')
                is_comment_or_whitespace *= (is_comment or is_whitespace)
                if is_comment:
                    at_least_one_comment = True

            min_indent = self.get_min_indent(cursor, start_pos, first_line,


            if is_comment_or_whitespace and at_least_one_comment:
                _logger.debug('need to uncomment')

                for _line_nb in range(first_line, last_line + 1):

                    if self.is_empty_line(cursor):

                    if _line_nb == first_line:
                        if self.position_in_block(cursor) > min_indent:
                            start_pos -= len(self.comment_str) + 1


                                        QTextCursor.MoveAnchor, min_indent)

                    for i in range(len(self.comment_str)):
                        end_pos -= 1

                    next_char = cursor.block().text()[self.position_in_block(
                    _logger.debug('next char: %s', next_char)

                    if next_char == ' ':
                        end_pos -= 1


                    self.sel_range = (start_pos, end_pos)


                _logger.debug('need to comment')
                for _line_nb in range(first_line, last_line + 1):
                    if self.is_empty_line(cursor):

                    if _line_nb == first_line:

                        if self.position_in_block(cursor) >= min_indent:
                            start_pos += len(self.comment_str) + 1


                                        QTextCursor.MoveAnchor, min_indent)

                    cursor.insertText('%s ' % self.comment_str)
                    end_pos += len(self.comment_str) + 1


                    self.sel_range = (start_pos, end_pos)


        elif mode == 'get_text_cur_pos':
            _logger.debug('get cur pos text')
            cursor = self.textedit.textCursor()

            positionInBlock = self.position_in_block(cursor)
            _logger.debug('block pos: %s', positionInBlock)
            _logger.debug('block text: %s', cursor.block().text())

        elif mode == 'tab':
            print 'tab'

            start_pos, end_pos = [

            # get last first line
            first_line, last_line = self.get_line_range(
                cursor, start_pos, end_pos)
            _logger.debug('tab key: %s %s', first_line,
                          last_line)  # range(first_line, last_line+1)

            start_pos += 4

            # indent 4 spaces
            for _line_nb in range(first_line, last_line + 1):

                # skip empty line
                if len(str(cursor.block().text()).strip(
                        ' ')) or first_line == last_line:

                    cursor.insertText(' ' * 4)
                    end_pos += 4


            self.sel_range = (start_pos, end_pos)


        elif mode == 'shift_tab':

            start_pos, end_pos = [

            # get last first line
            first_line, last_line = self.get_line_range(
                cursor, start_pos, end_pos)

            for _line_nb in range(first_line, last_line + 1):


                if _line_nb == first_line:

                    for i in range(4):
                        next_char = cursor.block().text()[
                        if next_char == ' ':
                            start_pos -= 1
                            end_pos -= 1

                    for i in range(4):
                        next_char = cursor.block().text()
                        if next_char:
                            next_char = next_char[self.position_in_block(
                        if next_char == ' ':
                            end_pos -= 1


            self.sel_range = (start_pos, end_pos)


        elif mode == 'undo':
            _logger.debug('undo')  # self.sel_range
            # if self.last_change=='toggle_comment':
            # self.toggle_comment()

            if getattr(self, 'sel_range', None):
                self.sel_range = None

        elif mode == 'delete_line':

            start_pos, end_pos = [

            # get last first line
            first_line, last_line = sorted(
                self.get_line_range(cursor, start_pos, end_pos))

            for _line_nb in reversed(range(first_line, last_line + 1)):

                text_block = self.textedit.document().findBlockByLineNumber(



        elif mode == 'execute':
            # if self.lastnge=='toggle_comment':
            # self.toggle_comment()
            # if something selected, run it
            start_pos, end_pos = [

            run_all = 0
            if start_pos == end_pos:
                # run all
                cmd = self.textedit.toPlainText()
                run_all = 1
                cmd = cursor.selectedText()

            # get first "non-empty" line content
            cursor.movePosition(QTextCursor.Start, QTextCursor.MoveAnchor)
            cursor.movePosition(QTextCursor.Right, QTextCursor.MoveAnchor,
            first_line = str(cursor.block().text())

            spaces = len(first_line) - len(first_line.lstrip(' '))
            start_pos_in_block = self.position_in_block(cursor)

            if not run_all:
                if start_pos_in_block < spaces:
                    # fill spaces
                    cmd = ' ' * start_pos_in_block + cmd
                    cmd = ' ' * spaces + cmd

            cmd = str(cmd.replace(QChar(0x2029), QChar('\n')))

            cmd = self.dedent_code(cmd)
            # print 'will run cmd:\n',cmd
Exemple #16
 def doCensus(self, page=False) :
     global reLineSep, reTokens, reLang, qcLess
     # Clear the current census values
     # Count chars locally for speed
     local_char_census = defaultdict(int)
     # Name of current alternate dictionary
     alt_dict = QString() # isEmpty when none
     # Tag from which we set an alternate dict
     alt_dict_tag = QString()
     # Start the progress bar based on the number of lines in the document
     pqMsgs.startBar(self.document().blockCount(),"Counting words and chars...")
     # Find the first text block of interest, skipping an HTML header file
     qtb = self.document().begin() # first text block
     if IMC.bookType.startsWith(QString(u"htm")) \
     and qtb.text().startsWith(QString(u"<!DOCTYPE")) :
         while (qtb != self.document().end()) \
         and (not qtb.text().startsWith(QString(u"<body"))) :
             qtb = qtb.next()
     # Scan all lines of the document to the end.
     while qtb != self.document().end() :
         qsLine = qtb.text() # text of line as qstring
         dbg = qsLine.size()
         dbg2 = qtb.length()
         if reLineSep.exactMatch(qsLine): # this is a page separator line
             if page :
                 # We are doing page seps, it's for Open with no .meta seen,
                 # the page table has been cleared. Store the page sep
                 # data in the page table, with a textCursor to its start.
                 qsfilenum = reLineSep.cap(1) # xxx from "File: xxx.png"
                 qsproofers = reLineSep.cap(2) # \who\x\blah\etc
                 # proofer names can contain spaces, replace with en-space char
                 qsproofers.replace(QChar(" "),QChar(0x2002))
                 # create a new TextCursor instance
                 tcursor = QTextCursor(self.document())
                 # point it to this text block
                 # dump all that in the page table
                 IMC.pageTable.loadPsep(tcursor, qsfilenum, qsproofers)
             # else not doing pages, just ignore this psep line
         else: # not psep, ordinary text line, count chars and words
             pyLine = unicode(qsLine) # move into Python space to count
             for c in pyLine :
                 local_char_census[c] += 1
             j = 0
             while True:
                 j = reTokens.indexIn(qsLine,j)
                 if j < 0 : # no more word-like units
                 qsWord = reTokens.cap(0)
                 j += qsWord.size()
                 if qsWord.startsWith(qcLess) :
                     # Examine a captured HTML production.
                     if not reTokens.cap(2).isEmpty() :
                         # HTML open tag, look for lang='dict'
                         if 0 <= reLang.indexIn(reTokens.cap(3)) :
                             # found it: save tag and dict name
                             alt_dict_tag = QString(reTokens.cap(2))
                             alt_dict = QString(reLang.cap(1))
                             alt_dict.prepend(u'/') # make "/en_GB"
                         # else no lang= attribute
                         # HTML close tag, see if it closes alt dict use
                         if reTokens.cap(5) == alt_dict_tag :
                             # yes, matches open-tag for dict, clear it
                             alt_dict_tag = QString()
                             alt_dict = QString()
                         # else no alt dict in use, or didn't match
                 else : # did not start with "<", process as a word
                     # Set the property flags, which is harder now we don't
                     # look at every character. Use the QString facilities
                     # rather than python because python .isalnum fails
                     # for a hyphenated number "1850-1910".
                     flag = 0
                     if 0 != qsWord.compare(qsWord.toLower()) :
                         flag |= IMC.WordHasUpper
                     if 0 != qsWord.compare(qsWord.toUpper()) :
                         flag |= IMC.WordHasLower
                     if qsWord.contains(qcHyphen) :
                         flag |= IMC.WordHasHyphen
                     if qsWord.contains(qcApostrophe) or qsWord.contains(qcCurlyApostrophe) :
                         flag |= IMC.WordHasApostrophe
                     if qsWord.contains(reDigit) :
                         flag |= IMC.WordHasDigit
             # end "while any more words in this line"
         # end of not-a-psep-line processing
         qtb = qtb.next() # move on to next block
         if (0 == (qtb.blockNumber() & 255)) : #every 256th block
             pqMsgs.rollBar(qtb.blockNumber()) # roll the bar
     # end of scanning all text blocks in the doc
     # we accumulated the char counts in localCharCensus. Now read it out
     # in sorted order and stick it in the IMC.charCensus list.
     for one_char in sorted(local_char_census.keys()):
         qc = QChar(ord(one_char)) # get to QChar for category() method
     IMC.needSpellCheck = True # after a census this is true
     IMC.staleCensus = 0 # but this is no longer true
     IMC.needMetadataSave |= IMC.wordlistsChanged
Exemple #17
 def doCensus(self, page=False):
     global reLineSep, reTokens, reLang, qcLess
     # Clear the current census values
     # Count chars locally for speed
     local_char_census = defaultdict(int)
     # Name of current alternate dictionary
     alt_dict = QString()  # isEmpty when none
     # Tag from which we set an alternate dict
     alt_dict_tag = QString()
     # Start the progress bar based on the number of lines in the document
                     "Counting words and chars...")
     # Find the first text block of interest, skipping an HTML header file
     qtb = self.document().begin()  # first text block
     if IMC.bookType.startsWith(QString(u"htm")) \
     and qtb.text().startsWith(QString(u"<!DOCTYPE")) :
         while (qtb != self.document().end()) \
         and (not qtb.text().startsWith(QString(u"<body"))) :
             qtb = qtb.next()
     # Scan all lines of the document to the end.
     while qtb != self.document().end():
         qsLine = qtb.text()  # text of line as qstring
         dbg = qsLine.size()
         dbg2 = qtb.length()
         if reLineSep.exactMatch(qsLine):  # this is a page separator line
             if page:
                 # We are doing page seps, it's for Open with no .meta seen,
                 # the page table has been cleared. Store the page sep
                 # data in the page table, with a textCursor to its start.
                 qsfilenum = reLineSep.cap(1)  # xxx from "File: xxx.png"
                 qsproofers = reLineSep.cap(2)  # \who\x\blah\etc
                 # proofer names can contain spaces, replace with en-space char
                 qsproofers.replace(QChar(" "), QChar(0x2002))
                 # create a new TextCursor instance
                 tcursor = QTextCursor(self.document())
                 # point it to this text block
                 # dump all that in the page table
                 IMC.pageTable.loadPsep(tcursor, qsfilenum, qsproofers)
             # else not doing pages, just ignore this psep line
         else:  # not psep, ordinary text line, count chars and words
             pyLine = unicode(qsLine)  # move into Python space to count
             for c in pyLine:
                 local_char_census[c] += 1
             j = 0
             while True:
                 j = reTokens.indexIn(qsLine, j)
                 if j < 0:  # no more word-like units
                 qsWord = reTokens.cap(0)
                 j += qsWord.size()
                 if qsWord.startsWith(qcLess):
                     # Examine a captured HTML production.
                     if not reTokens.cap(2).isEmpty():
                         # HTML open tag, look for lang='dict'
                         if 0 <= reLang.indexIn(reTokens.cap(3)):
                             # found it: save tag and dict name
                             alt_dict_tag = QString(reTokens.cap(2))
                             alt_dict = QString(reLang.cap(1))
                             alt_dict.prepend(u'/')  # make "/en_GB"
                         # else no lang= attribute
                         # HTML close tag, see if it closes alt dict use
                         if reTokens.cap(5) == alt_dict_tag:
                             # yes, matches open-tag for dict, clear it
                             alt_dict_tag = QString()
                             alt_dict = QString()
                         # else no alt dict in use, or didn't match
                 else:  # did not start with "<", process as a word
                     # Set the property flags, which is harder now we don't
                     # look at every character. Use the QString facilities
                     # rather than python because python .isalnum fails
                     # for a hyphenated number "1850-1910".
                     flag = 0
                     if 0 != qsWord.compare(qsWord.toLower()):
                         flag |= IMC.WordHasUpper
                     if 0 != qsWord.compare(qsWord.toUpper()):
                         flag |= IMC.WordHasLower
                     if qsWord.contains(qcHyphen):
                         flag |= IMC.WordHasHyphen
                     if qsWord.contains(qcApostrophe) or qsWord.contains(
                         flag |= IMC.WordHasApostrophe
                     if qsWord.contains(reDigit):
                         flag |= IMC.WordHasDigit
                     IMC.wordCensus.count(qsWord.append(alt_dict), flag)
             # end "while any more words in this line"
         # end of not-a-psep-line processing
         qtb = qtb.next()  # move on to next block
         if (0 == (qtb.blockNumber() & 255)):  #every 256th block
             pqMsgs.rollBar(qtb.blockNumber())  # roll the bar
     # end of scanning all text blocks in the doc
     # we accumulated the char counts in localCharCensus. Now read it out
     # in sorted order and stick it in the IMC.charCensus list.
     for one_char in sorted(local_char_census.keys()):
         qc = QChar(ord(one_char))  # get to QChar for category() method
         IMC.charCensus.append(QString(qc), local_char_census[one_char],
     IMC.needSpellCheck = True  # after a census this is true
     IMC.staleCensus = 0  # but this is no longer true
     IMC.needMetadataSave |= IMC.wordlistsChanged
Exemple #18
# HTML end tag, not allowing for any attributes (or spaces)

xp_end = '''(</(\w+)>)'''

# Put it all together: a token is any of those three things:

xp_any = xp_start + '|' + xp_end + '|' + xp_hyap

reTokens = QRegExp(xp_any, Qt.CaseInsensitive)

# When reTokens matches an HTML close tag, reTokens.cap(5) is the closed tag name.
# When reTokens matches an HTML open tag, reTokens.cap(2) is the tag name
# ("i" or "span" or "div"), reTokens.cap(3) has whatever attributes it had
# (class='x', lang='en_GB'). We scan that for lang='value' (optional quotes).

reLang = QRegExp(u'''lang=[\\'\\"]*([\\w\\-]+)[\\'\\"]*''')

# The 'value' matched by reLang.cap(0) is a language designation but we require
# it to be a dictionary tag such as 'en_US' or 'fr_FR'. It is not clear from
# the W3C docs whether all (or any) of our dic tags are language designations.

# According to W3C (http://www.w3.org/TR/html401/struct/dirlang.html) you can
# put lang= into any tag, esp. span, para, div, td, and so forth.
# We save the dict tag as an alternate dictionary for all words until the
# matching close tag is seen.

reDigit = QRegExp(u'\\d')
qcLess = QChar(u"<")
qcHyphen = QChar(u"-")
qcApostrophe = QChar(u"'")
qcCurlyApostrophe = QChar(8217)  # aka \u2019