def wordSplit(word, maxWidths, fontName, fontSize, encoding='utf8'):
    """Attempts to break a word which lacks spaces into two parts, the first of which
    fits in the remaining space.  It is allowed to add hyphens or whatever it wishes.

    This is intended as a wrapper for some language- and user-choice-specific splitting
    algorithms.  It should only be called after line breaking on spaces, which covers western
    languages and is highly optimised already.  It works on the 'last unsplit word'.

    Presumably with further study one could write a Unicode splitting algorithm for text
    fragments whick was much faster.

    Courier characters should be 6 points wide.
    >>> wordSplit('HelloWorld', 30, 'Courier', 10)
    [[0.0, 'Hello'], [0.0, 'World']]
    >>> wordSplit('HelloWorld', 31, 'Courier', 10)
    [[1.0, 'Hello'], [1.0, 'World']]
    """
    if not isUnicode(word):
        uword = word.decode(encoding)
    else:
        uword = word

    charWidths = getCharWidths(uword, fontName, fontSize)
    lines = dumbSplit(uword, charWidths, maxWidths)

    if not isUnicode(word):
        lines2 = []
        #convert back
        for (extraSpace, text) in lines:
            lines2.append([extraSpace, text.encode(encoding)])
        lines = lines2

    return lines
def wordSplit(word, maxWidths, fontName, fontSize, encoding='utf8'):
    """Attempts to break a word which lacks spaces into two parts, the first of which
    fits in the remaining space.  It is allowed to add hyphens or whatever it wishes.

    This is intended as a wrapper for some language- and user-choice-specific splitting
    algorithms.  It should only be called after line breaking on spaces, which covers western
    languages and is highly optimised already.  It works on the 'last unsplit word'.

    Presumably with further study one could write a Unicode splitting algorithm for text
    fragments whick was much faster.

    Courier characters should be 6 points wide.
    >>> wordSplit('HelloWorld', 30, 'Courier', 10)
    [[0.0, 'Hello'], [0.0, 'World']]
    >>> wordSplit('HelloWorld', 31, 'Courier', 10)
    [[1.0, 'Hello'], [1.0, 'World']]
    """
    if not isUnicode(word):
        uword = word.decode(encoding)
    else:
        uword = word

    charWidths = getCharWidths(uword, fontName, fontSize)
    lines = dumbSplit(uword, charWidths, maxWidths)

    if not isUnicode(word):
        lines2 = []
        #convert back
        for (extraSpace, text) in lines:
            lines2.append([extraSpace, text.encode(encoding)])
        lines = lines2

    return lines
Beispiel #3
0
    def reset(self):
        """restore the cipher to it's start state"""
        # Initialize private key, k With the values of the key mod 256.
        # and sbox With numbers 0 - 255. Then compute sbox
        key = self._key
        if isUnicode(key):
            key = key.encode("utf8")
        sbox = list(range(256))
        k = list(range(256))
        lk = len(key)
        if isPy3:
            for i in sbox:
                k[i] = key[i % lk] % 256
        else:
            for i in sbox:
                k[i] = ord(key[i % lk]) % 256

                # Re-order sbox using the private key, k.
                # Iterating each element of sbox re-calculate the counter j
                # Then interchange the elements sbox[a] & sbox[b]
        j = 0
        for i in range(256):
            j = (j + sbox[i] + k[i]) % 256
            sbox[i], sbox[j] = sbox[j], sbox[i]
        self._sbox, self._i, self._j = sbox, 0, 0
Beispiel #4
0
 def testUtf8FileName(self):
     fn=outputfile('test_pdfbase_utf8_filename')
     if not isUnicode(fn): fn = fn.decode('utf8')
     fn += u'_portr\xe4t.pdf'
     c = Canvas(fn)
     c.drawString(100,700, u'Filename='+fn)
     c.save()
Beispiel #5
0
def process(datafile, notes=0, handout=0, printout=0, cols=0, verbose=0, outDir=None, datafilename=None, fx=1):
    "Process one PythonPoint source file."
    if not hasattr(datafile, "read"):
        if not datafilename: datafilename = datafile
        datafile = open(datafile,'rb')
    else:
        if not datafilename: datafilename = "PseudoFile"
    rawdata = datafile.read()
    if not isUnicode(rawdata):
        encs = ['utf8','iso-8859-1']
        m=_re_match(r'^\s*(<\?xml[^>]*\?>)',rawdata)
        if m:
            m1=_re_match(r"""^.*\sencoding\s*=\s*("[^"]*"|'[^']*')""",m.group(1))
            if m1:
                enc = m1.group(1)[1:-1]
                if enc:
                    if enc in encs:
                        encs.remove(enc)
                    encs.insert(0,enc)
        for enc in encs:
            try:
                udata = rawdata.decode(enc)
                break
            except:
                pass
        else:
            raise ValueError('cannot decode input data')
    else:
        udata = rawdata
    rawdata = udata

    #if pyRXP present, use it to check and get line numbers for errors...
    validate(rawdata)
    return _process(rawdata, datafilename, notes, handout, printout, cols, verbose, outDir, fx)
def pygments2xpre(s, language="python"):
    "Return markup suitable for XPreformatted"
    try:
        from pygments import highlight
        from pygments.formatters import HtmlFormatter
    except ImportError:
        return s

    from pygments.lexers import get_lexer_by_name
    rconv = lambda x: x
    if isPy3:
        out = getStringIO()
    else:
        if isUnicode(s):
            s = asBytes(s)
            rconv = asUnicode
        out = getBytesIO()

    l = get_lexer_by_name(language)
    
    h = HtmlFormatter()
    highlight(s,l,h,out)
    styles = [(cls, style.split(';')[0].split(':')[1].strip())
                for cls, (style, ttype, level) in h.class2style.items()
                if cls and style and style.startswith('color:')]
    return rconv(_2xpre(out.getvalue(),styles))
Beispiel #7
0
 def instanceStringWidthTTF(self, text, size, encoding='utf-8'):
     "Calculate text width"
     if not isUnicode(text):
         text = text.decode(encoding or 'utf-8')
     g = self.face.charWidths.get
     dw = self.face.defaultWidth
     return 0.001 * size * sum([g(ord(u), dw) for u in text])
Beispiel #8
0
 def instanceStringWidthT1(self, text, size, encoding='utf8'):
     """This is the "purist" approach to width"""
     if not isUnicode(text): text = text.decode(encoding)
     return sum([
         sum(map(f.widths.__getitem__, list(map(ord, t))))
         for f, t in unicode2T1(text, [self] + self.substitutionFonts)
     ]) * 0.001 * size
Beispiel #9
0
 def instanceStringWidthTTF(self, text, size, encoding='utf-8'):
     "Calculate text width"
     if not isUnicode(text):
         text = text.decode(encoding or 'utf-8')
     g = self.face.charWidths.get
     dw = self.face.defaultWidth
     return 0.001*size*sum([g(ord(u),dw) for u in text])
 def testUtf8FileName(self):
     fn = outputfile('test_pdfbase_utf8_filename')
     if not isUnicode(fn): fn = fn.decode('utf8')
     fn += u'_portr\xe4t.pdf'
     c = Canvas(fn)
     c.drawString(100, 700, u'Filename=' + fn)
     c.save()
Beispiel #11
0
 def unicode2T1(utext,fonts):
     '''return a list of (font,string) pairs representing the unicode text'''
     R = []
     font, fonts = fonts[0], fonts[1:]
     enc = font.encName
     if 'UCS-2' in enc:
         enc = 'UTF16'
     while utext:
         try:
             if isUnicode(utext):
                 s = utext.encode(enc)
             else:
                 s = utext
             R.append((font,s))
             break
         except UnicodeEncodeError as e:
             i0, il = e.args[2:4]
             if i0:
                 R.append((font,utext[:i0].encode(enc)))
             if fonts:
                 R.extend(unicode2T1(utext[i0:il],fonts))
             else:
                 R.append((font._notdefFont,font._notdefChar*(il-i0)))
             utext = utext[il:]
     return R
Beispiel #12
0
    def _issueT1String(self,fontObj,x,y,s):
        fc = fontObj
        code_append = self.code_append
        fontSize = self._fontSize
        fontsUsed = self._fontsUsed
        escape = self._escape
        if not isUnicode(s):
            try:
                s = s.decode('utf8')
            except UnicodeDecodeError as e:
                i,j = e.args[2:4]
                raise UnicodeDecodeError(*(e.args[:4]+('%s\n%s-->%s<--%s' % (e.args[4],s[i-10:i],s[i:j],s[j:j+10]),)))

        for f, t in unicode2T1(s,[fontObj]+fontObj.substitutionFonts):
            if f!=fc:
                psName = asNative(f.face.name)
                code_append('(%s) findfont %s scalefont setfont' % (psName,fp_str(fontSize)))
                if psName not in fontsUsed:
                    fontsUsed.append(psName)
                fc = f
            code_append('%s m (%s) show ' % (fp_str(x,y),escape(t)))
            x += f.stringWidth(t.decode(f.encName),fontSize)
        if fontObj!=fc:
            self._font = None
            self.setFont(fontObj.face.name,fontSize)
Beispiel #13
0
    def _issueT1String(self, fontObj, x, y, s):
        fc = fontObj
        code_append = self.code_append
        fontSize = self._fontSize
        fontsUsed = self._fontsUsed
        escape = self._escape
        if not isUnicode(s):
            try:
                s = s.decode('utf8')
            except UnicodeDecodeError as e:
                i, j = e.args[2:4]
                raise UnicodeDecodeError(
                    *(e.args[:4] +
                      ('%s\n%s-->%s<--%s' %
                       (e.args[4], s[i - 10:i], s[i:j], s[j:j + 10]), )))

        for f, t in unicode2T1(s, [fontObj] + fontObj.substitutionFonts):
            if f != fc:
                psName = asNative(f.face.name)
                code_append('(%s) findfont %s scalefont setfont' %
                            (psName, fp_str(fontSize)))
                if psName not in fontsUsed:
                    fontsUsed.append(psName)
                fc = f
            code_append('%s m (%s) show ' % (fp_str(x, y), escape(t)))
            x += f.stringWidth(t.decode(f.encName), fontSize)
        if fontObj != fc:
            self._font = None
            self.setFont(fontObj.face.name, fontSize)
Beispiel #14
0
 def unicode2T1(utext, fonts):
     '''return a list of (font,string) pairs representing the unicode text'''
     R = []
     font, fonts = fonts[0], fonts[1:]
     enc = font.encName
     if 'UCS-2' in enc:
         enc = 'UTF16'
     while utext:
         try:
             if isUnicode(utext):
                 s = utext.encode(enc)
             else:
                 s = utext
             R.append((font, s))
             break
         except UnicodeEncodeError as e:
             i0, il = e.args[2:4]
             if i0:
                 R.append((font, utext[:i0].encode(enc)))
             if fonts:
                 R.extend(unicode2T1(utext[i0:il], fonts))
             else:
                 R.append((font._notdefFont, font._notdefChar * (il - i0)))
             utext = utext[il:]
     return R
Beispiel #15
0
def writeXML(tree):
    "Convert to a string.  No auto-indenting provided yet"
    if isUnicode(tree):
        return tree
    else:
        (tagName, attrs, children, spare) = tree
        chunks = []
        chunks.append(u'<%s ' % tree)
Beispiel #16
0
def writeXML(tree):
    "Convert to a string.  No auto-indenting provided yet"
    if isUnicode(tree):
        return tree
    else:
        (tagName, attrs, children, spare) = tree
        chunks = []
        chunks.append(u'<%s ' % tree)
Beispiel #17
0
 def resolve(self, text, enc='utf8'):
     self._output = []
     self.reset()
     if not isUnicode(text):
         text = text.decode(enc)
     else:
         enc = None
     self.feed(nakedAmpFix(text).replace(u'<br/>',u'<br />'))
     v = u''.join(self._output)
     return v.encode(enc) if enc else v
Beispiel #18
0
 def resolve(self, text, enc='utf8'):
     self._output = []
     self.reset()
     if not isUnicode(text):
         text = text.decode(enc)
     else:
         enc = None
     self.feed(nakedAmpFix(text).replace(u'<br/>', u'<br />'))
     v = u''.join(self._output)
     return v.encode(enc) if enc else v
Beispiel #19
0
 def asUnicode(self, markup):
     """convert to unicode"""
     #TODO
     if not isUnicode(markup):
         try:
             markup = markup.decode('utf8', 'strict')
         except UnicodeDecodeError:
             #assume windows encoding
             markup = markup.decode('cp1252', 'replace')
     return markup
    def drawString(self, x, y, text, _fontInfo=None, text_anchor='left'):
        gs = self._gs
        gs_fontSize = gs.fontSize
        gs_fontName = gs.fontName
        if _fontInfo and _fontInfo != (gs_fontSize, gs_fontName):
            fontName, fontSize = _fontInfo
            _setFont(gs, fontName, fontSize)
        else:
            fontName = gs_fontName
            fontSize = gs_fontSize

        try:
            if text_anchor in ('end', 'middle', 'end'):
                textLen = stringWidth(text, fontName, fontSize)
                if text_anchor == 'end':
                    x -= textLen
                elif text_anchor == 'middle':
                    x -= textLen / 2.
                elif text_anchor == 'numeric':
                    x -= numericXShift(text_anchor, text, textLen, fontName,
                                       fontSize)

            if self._backend == 'rlPyCairo':
                gs.drawString(x, y, text)
            else:
                font = getFont(fontName)
                if font._dynamicFont:
                    gs.drawString(x, y, text)
                else:
                    fc = font
                    if not isUnicode(text):
                        try:
                            text = text.decode('utf8')
                        except UnicodeDecodeError as e:
                            i, j = e.args[2:4]
                            raise UnicodeDecodeError(
                                *(e.args[:4] +
                                  ('%s\n%s-->%s<--%s' %
                                   (e.args[4], text[i - 10:i], text[i:j],
                                    text[j:j + 10]), )))

                    FT = unicode2T1(text, [font] + font.substitutionFonts)
                    n = len(FT)
                    nm1 = n - 1
                    for i in range(n):
                        f, t = FT[i]
                        if f != fc:
                            _setFont(gs, f.fontName, fontSize)
                            fc = f
                        gs.drawString(x, y, t)
                        if i != nm1:
                            x += f.stringWidth(t.decode(f.encName), fontSize)
        finally:
            gs.setFont(gs_fontName, gs_fontSize)
Beispiel #21
0
def _AsciiHexEncode(input):
    """Encodes input using ASCII-Hex coding.

    This is a verbose encoding used for binary data within
    a PDF file.  One byte binary becomes two bytes of ASCII.
    Helper function used by images."""
    if isUnicode(input):
        input = input.encode('utf-8')
    output = getBytesIO()
    output.write(binascii.b2a_hex(input))
    output.write(b'>')
    return output.getvalue()
Beispiel #22
0
def _AsciiHexEncode(input):
    """Encodes input using ASCII-Hex coding.

    This is a verbose encoding used for binary data within
    a PDF file.  One byte binary becomes two bytes of ASCII.
    Helper function used by images."""
    if isUnicode(input):
        input = input.encode('utf-8')
    output = getBytesIO()
    output.write(binascii.b2a_hex(input))
    output.write(b'>')
    return output.getvalue()
 def write(self,u):
     if isBytes(u):
         try:
              u = u.decode('utf-8')
         except:
             et, ev, tb = sys.exc_info()
             ev = str(ev)
             del et, tb
             raise ValueError("String %r not encoded as 'utf-8'\nerror=%s" % (u,ev))
     elif not isUnicode(u):
         raise ValueError("EncodedWriter.write(%s) argument should be 'utf-8' bytes or str" % ascii(u))
     self.append(u)
Beispiel #24
0
 def write(self,u):
     if isBytes(u):
         try:
              u = u.decode('utf-8')
         except:
             et, ev, tb = sys.exc_info()
             ev = str(ev)
             del et, tb
             raise ValueError("String %r not encoded as 'utf-8'\nerror=%s" % (u,ev))
     elif not isUnicode(u):
         raise ValueError("EncodedWriter.write(%s) argument should be 'utf-8' bytes or str" % ascii(u))
     self.append(u)
Beispiel #25
0
 def splitString(self, text, doc, encoding='utf-8'):
     """Splits text into a number of chunks, each of which belongs to a
     single subset.  Returns a list of tuples (subset, string).  Use subset
     numbers with getSubsetInternalName.  Doc is needed for distinguishing
     subsets when building different documents at the same time."""
     asciiReadable = self._asciiReadable
     try:
         state = self.state[doc]
     except KeyError:
         state = self.state[doc] = TTFont.State(asciiReadable)
     curSet = -1
     cur = []
     results = []
     if not isUnicode(text):
         text = text.decode('utf-8')  # encoding defaults to utf-8
     assignments = state.assignments
     subsets = state.subsets
     for code in map(ord, text):
         if code in assignments:
             n = assignments[code]
         else:
             if state.frozen:
                 raise pdfdoc.PDFError(
                     "Font %s is already frozen, cannot add new character U+%04X"
                     % (self.fontName, code))
             n = state.nextCode
             if n & 0xFF == 32:
                 # make code 32 always be a space character
                 if n != 32: subsets[n >> 8].append(32)
                 state.nextCode += 1
                 n = state.nextCode
             state.nextCode += 1
             assignments[code] = n
             if n > 32:
                 if not (n & 0xFF): subsets.append([])
                 subsets[n >> 8].append(code)
             else:
                 subsets[0][n] = code
         if (n >> 8) != curSet:
             if cur:
                 results.append(
                     (curSet,
                      bytes(cur) if isPy3 else ''.join(chr(c)
                                                       for c in cur)))
             curSet = (n >> 8)
             cur = []
         cur.append(n & 0xFF)
     if cur:
         results.append(
             (curSet, bytes(cur) if isPy3 else ''.join(chr(c)
                                                       for c in cur)))
     return results
Beispiel #26
0
def _AsciiHexDecode(input):
    """Decodes input using ASCII-Hex coding.

    Not used except to provide a test of the inverse function."""

    #strip out all whitespace
    if not isUnicode(input):
        input = input.decode('utf-8')
    stripped = ''.join(input.split())
    assert stripped[-1] == '>', 'Invalid terminator for Ascii Hex Stream'
    stripped = stripped[:-1]  #chop off terminator
    assert len(stripped) % 2 == 0, 'Ascii Hex stream has odd number of bytes'

    return ''.join([chr(int(stripped[i:i+2],16)) for i in range(0,len(stripped),2)])
Beispiel #27
0
def _AsciiHexDecode(input):
    """Decodes input using ASCII-Hex coding.

    Not used except to provide a test of the inverse function."""

    #strip out all whitespace
    if not isUnicode(input):
        input = input.decode('utf-8')
    stripped = ''.join(input.split())
    assert stripped[-1] == '>', 'Invalid terminator for Ascii Hex Stream'
    stripped = stripped[:-1]  #chop off terminator
    assert len(stripped) % 2 == 0, 'Ascii Hex stream has odd number of bytes'

    return ''.join([chr(int(stripped[i:i+2],16)) for i in range(0,len(stripped),2)])
Beispiel #28
0
    def __init__(self, value='Hello World', **kw):
        self.value = isUnicodeOrQRList.normalize(value)
        for k, v in kw.items():
            setattr(self, k, v)

        ec_level = getattr(qrencoder.QRErrorCorrectLevel, self.barLevel)

        self.__dict__['qr'] = qrencoder.QRCode(self.qrVersion, ec_level)

        if isUnicode(self.value):
            self.addData(self.value)
        elif self.value:
            for v in self.value:
                self.addData(v)
Beispiel #29
0
    def __init__(self, value='Hello World', **kw):
        self.value = isUnicodeOrQRList.normalize(value)
        for k, v in kw.items():
            setattr(self, k, v)

        ec_level = getattr(qrencoder.QRErrorCorrectLevel, self.barLevel)

        self.__dict__['qr'] = qrencoder.QRCode(self.qrVersion, ec_level)

        if isUnicode(self.value):
            self.addData(self.value)
        elif self.value:
            for v in self.value:
                self.addData(v)
 def normalizeName(self, name):
     if not isUnicode(name):
         for enc in ('utf8', 'latin1'):
             try:
                 name = asUnicode(name, enc)
                 break
             except:
                 pass
         else:
             raise ValueError('Cannot normalize name %r' % name)
     r = name.strip().lower()
     nns = getattr(self, 'normalizeNameSpaces', None)
     if isStr(nns):
         r = nns.join(filter(None, r.split()))
     return r
def parse_catalog(filename):
    """Validate and parse XML.  This will complain if invalid

    We fully parse the XML and turn into Python variables, so that any encoding
    issues are confronted here rather than in the template
    """
    xml = open(filename).read()
    if isUnicode(xml):
        xml = xml.encode('utf8')  #required for python 2.7 & >=3.3
    p = pyRXPU.Parser()
    tree = p.parse(xml)
    tagTree = TagWrapper(tree)
    request_a_quote = [109, 110, 4121, 4122, 4123]
    # we now need to de-duplicate; the query returns multiple rows with different images
    # in them.  if id is same, assume it's the same product.
    ids_seen = set()
    products = []
    for prodTag in tagTree:
        id = int(str(prodTag.ProductId1))  #extract tag content
        if id in ids_seen:
            continue
        else:
            ids_seen.add(id)
        prod = Product()
        prod.id = id
        prod.modelNumber = int(str(prodTag.ModelNumber))
        prod.archived = (str(prodTag.Archived) == 'true')
        prod.name = fix(prodTag.ModelName)
        prod.summary = fix(prodTag.Summary)
        prod.description = fix(prodTag.Description)

        #originally the images came from a remote site.  We have stashed them in
        #the img/ subdirectory, so just chop off the final part of the path.
        #asNative required for python 2.7 & >=3.3
        prod.image = os.path.split(asNative(fix(
            prodTag.ImageUrl)))[-1].replace(' ', '')

        if prod.modelNumber in request_a_quote:
            prod.price = "Call us on 01635 246830 for a quote"
        else:
            prod.price = '&pound;' + str(
                prodTag.UnitCost)[0:len(str(prodTag.UnitCost)) - 2]
        if prod.archived:
            pass
        else:
            products.append(prod)
    products.sort(key=lambda x: x.modelNumber)
    return products
Beispiel #32
0
 def splitString(self, text, doc, encoding='utf-8'):
     """Splits text into a number of chunks, each of which belongs to a
     single subset.  Returns a list of tuples (subset, string).  Use subset
     numbers with getSubsetInternalName.  Doc is needed for distinguishing
     subsets when building different documents at the same time."""
     asciiReadable = self._asciiReadable
     try: state = self.state[doc]
     except KeyError: state = self.state[doc] = TTFont.State(asciiReadable)
     curSet = -1
     cur = []
     results = []
     if not isUnicode(text):
         text = text.decode('utf-8')     # encoding defaults to utf-8
     assignments = state.assignments
     subsets = state.subsets
     for code in map(ord,text):
         if code in assignments:
             n = assignments[code]
         else:
             if state.frozen:
                 raise pdfdoc.PDFError("Font %s is already frozen, cannot add new character U+%04X" % (self.fontName, code))
             n = state.nextCode
             if n&0xFF==32:
                 # make code 32 always be a space character
                 if n!=32: subsets[n >> 8].append(32)
                 state.nextCode += 1
                 n = state.nextCode
             state.nextCode += 1
             assignments[code] = n
             if n>32:
                 if not(n&0xFF): subsets.append([])
                 subsets[n >> 8].append(code)
             else:
                 subsets[0][n] = code
         if (n >> 8) != curSet:
             if cur:
                 results.append((curSet,bytes(cur) if isPy3 else ''.join(chr(c) for c in cur)))
             curSet = (n >> 8)
             cur = []
         cur.append(n & 0xFF)
     if cur:
         results.append((curSet,bytes(cur) if isPy3 else ''.join(chr(c) for c in cur)))
     return results
Beispiel #33
0
    def reset(self):
        '''restore the cipher to it's start state'''
        #Initialize private key, k With the values of the key mod 256.
        #and sbox With numbers 0 - 255. Then compute sbox
        key = self._key
        if isUnicode(key): key = key.encode('utf8')
        sbox = list(range(256))
        k = list(range(256))
        lk = len(key)
        for i in sbox:
            k[i] = key[i % lk] % 256

        #Re-order sbox using the private key, k.
        #Iterating each element of sbox re-calculate the counter j
        #Then interchange the elements sbox[a] & sbox[b]
        j = 0
        for i in range(256):
            j = (j + sbox[i] + k[i]) % 256
            sbox[i], sbox[j] = sbox[j], sbox[i]
        self._sbox, self._i, self._j = sbox, 0, 0
Beispiel #34
0
    def drawString(self, x, y, text, _fontInfo=None):
        gs = self._gs
        if _fontInfo:
            fontName, fontSize = _fontInfo
        else:
            fontSize = gs.fontSize
            fontName = gs.fontName
        try:
            gfont = getFont(gs.fontName)
        except:
            gfont = None
        font = getFont(fontName)
        if font._dynamicFont:
            gs.drawString(x, y, text)
        else:
            fc = font
            if not isUnicode(text):
                try:
                    text = text.decode("utf8")
                except UnicodeDecodeError as e:
                    i, j = e.args[2:4]
                    raise UnicodeDecodeError(
                        *(
                            e.args[:4]
                            + ("%s\n%s-->%s<--%s" % (e.args[4], text[i - 10 : i], text[i:j], text[j : j + 10]),)
                        )
                    )

            FT = unicode2T1(text, [font] + font.substitutionFonts)
            n = len(FT)
            nm1 = n - 1
            for i in range(n):
                f, t = FT[i]
                if f != fc:
                    _setFont(gs, f.fontName, fontSize)
                    fc = f
                gs.drawString(x, y, t)
                if i != nm1:
                    x += f.stringWidth(t.decode(f.encName), fontSize)
            if font != fc:
                _setFont(gs, fontName, fontSize)
Beispiel #35
0
    def drawString(self, x, y, text, _fontInfo=None):
        gs = self._gs
        if _fontInfo:
            fontName, fontSize = _fontInfo
        else:
            fontSize = gs.fontSize
            fontName = gs.fontName
        try:
            gfont = getFont(gs.fontName)
        except:
            gfont = None
        font = getFont(fontName)
        if font._dynamicFont:
            gs.drawString(x, y, text)
        else:
            fc = font
            if not isUnicode(text):
                try:
                    text = text.decode('utf8')
                except UnicodeDecodeError as e:
                    i, j = e.args[2:4]
                    raise UnicodeDecodeError(
                        *(e.args[:4] + ('%s\n%s-->%s<--%s' %
                                        (e.args[4], text[i - 10:i], text[i:j],
                                         text[j:j + 10]), )))

            FT = unicode2T1(text, [font] + font.substitutionFonts)
            n = len(FT)
            nm1 = n - 1
            for i in range(n):
                f, t = FT[i]
                if f != fc:
                    _setFont(gs, f.fontName, fontSize)
                    fc = f
                gs.drawString(x, y, t)
                if i != nm1:
                    x += f.stringWidth(t.decode(f.encName), fontSize)
            if font != fc:
                _setFont(gs, fontName, fontSize)
Beispiel #36
0
def process(datafile, notes=0, handout=0, printout=0, cols=0, verbose=0, outDir=None, datafilename=None, fx=1):
    "Process one PythonPoint source file."
    if not hasattr(datafile, "read"):
        if not datafilename: datafilename = datafile
        datafile = open(datafile,'rb')
    else:
        if not datafilename: datafilename = "PseudoFile"
    rawdata = datafile.read()
    if not isUnicode(rawdata):
        encs = ['utf8','iso-8859-1']
        m=_re_match(r'^\s*(<\?xml[^>]*\?>)',rawdata)
        if m:
            m1=_re_match(r"""^.*\sencoding\s*=\s*("[^"]*"|'[^']*')""",m.group(1))
            if m1:
                enc = m1.group(1)[1:-1]
                if enc:
                    if enc in encs:
                        encs.remove(enc)
                    encs.insert(0,enc)
        for enc in encs:
            try:
                udata = rawdata.decode(enc)
                break
            except:
                pass
        else:
            raise ValueError('cannot decode input data')
    else:
        udata = rawdata
    if isPy3:
        rawdata = udata
    else:
        rawdata = udata.encode('utf8')

    #if pyRXP present, use it to check and get line numbers for errors...
    validate(rawdata)
    return _process(rawdata, datafilename, notes, handout, printout, cols, verbose, outDir, fx)
Beispiel #37
0
def tt2xml(tt):
    '''convert tuple tree form to unicode xml'''
    if tt is None: return ''
    if isBytes(tt):
        return tt2xml(tt.decode('utf8'))
    if isUnicode(tt):
        return escape(tt)
    if isinstance(tt,list):
        return ''.join(tt2xml(x) for x in tt)
    if isinstance(tt,tuple):
        tag = tt[0].decode('utf8')
        L=['<'+tag].append
        C = tt[2]
        if tt[1]:
            for k,v in tt[1].items():
                L((' %s=%s' % (k,quoteattr(v))).decode('utf8'))
        if C is not None:
            L('>')
            L(tt2xml(C))
            L('</'+tag+'>')
        else:
            L('/>')
        return ''.join(L.__self__)
    raise ValueError('Invalid value %r passed to tt2xml' % tt)
 def __getitem__(self, x):
     try:
         v = TagWrapper.__getitem__(self, x)
         return FakingStr(v) if isinstance(v, strTypes) else v
     except:
         return FakingStr(u'' if isUnicode(self.tagName) else '')
def dumbSplit(word, widths, maxWidths):
    """This function attempts to fit as many characters as possible into the available
    space, cutting "like a knife" between characters.  This would do for Chinese.
    It returns a list of (text, extraSpace) items where text is a Unicode string,
    and extraSpace is the points of unused space available on the line.  This is a
    structure which is fairly easy to display, and supports 'backtracking' approaches
    after the fact.

    Test cases assume each character is ten points wide...

    >>> dumbSplit(u'Hello', [10]*5, 60)
    [[10, u'Hello']]
    >>> dumbSplit(u'Hello', [10]*5, 50)
    [[0, u'Hello']]
    >>> dumbSplit(u'Hello', [10]*5, 40)
    [[0, u'Hell'], [30, u'o']]
    """
    _more = """
    #>>> dumbSplit(u'Hello', [10]*5, 4)   # less than one character
    #(u'', u'Hello')
    # this says 'Nihongo wa muzukashii desu ne!' (Japanese is difficult isn't it?) in 12 characters
    >>> jtext = u'\u65e5\u672c\u8a9e\u306f\u96e3\u3057\u3044\u3067\u3059\u306d\uff01'
    >>> dumbSplit(jtext, [10]*11, 30)   #
    (u'\u65e5\u672c\u8a9e', u'\u306f\u96e3\u3057\u3044\u3067\u3059\u306d\uff01')
    """
    if not isinstance(maxWidths,(list,tuple)): maxWidths = [maxWidths]
    assert isUnicode(word)
    lines = []
    i = widthUsed = lineStartPos = 0
    maxWidth = maxWidths[0]
    nW = len(word)
    while i<nW:
        w = widths[i]
        c = word[i]
        widthUsed += w
        i += 1
        if widthUsed > maxWidth + _FUZZ and widthUsed>0:
            extraSpace = maxWidth - widthUsed
            if ord(c)<0x3000:
                # we appear to be inside a non-Asian script section.
                # (this is a very crude test but quick to compute).
                # This is likely to be quite rare so the speed of the
                # code below is hopefully not a big issue.  The main
                # situation requiring this is that a document title
                # with an english product name in it got cut.
                
                
                # we count back and look for 
                #  - a space-like character
                #  - reversion to Kanji (which would be a good split point)
                #  - in the worst case, roughly half way back along the line
                limitCheck = (lineStartPos+i)>>1        #(arbitrary taste issue)
                for j in range(i-1,limitCheck,-1):
                    cj = word[j]
                    if category(cj)=='Zs' or ord(cj)>=0x3000:
                        k = j+1
                        if k<i:
                            j = k+1
                            extraSpace += sum(widths[j:i])
                            w = widths[k]
                            c = word[k]
                            i = j
                            break

                #end of English-within-Asian special case

            #we are pushing this character back, but
            #the most important of the Japanese typography rules
            #if this character cannot start a line, wrap it up to this line so it hangs
            #in the right margin. We won't do two or more though - that's unlikely and
            #would result in growing ugliness.
            #and increase the extra space
            #bug fix contributed by Alexander Vasilenko <*****@*****.**>
            if c not in ALL_CANNOT_START and i>lineStartPos+1:
                #otherwise we need to push the character back
                #the i>lineStart+1 condition ensures progress
                i -= 1
                extraSpace += w

            #lines.append([maxWidth-sum(widths[lineStartPos:i]), word[lineStartPos:i].strip()])
            lines.append([extraSpace, word[lineStartPos:i].strip()])
            try:
                maxWidth = maxWidths[len(lines)]
            except IndexError:
                maxWidth = maxWidths[-1]  # use the last one
            lineStartPos = i
            widthUsed = 0

    #any characters left?
    if widthUsed > 0:
        lines.append([maxWidth - widthUsed, word[lineStartPos:]])

    return lines
Beispiel #40
0
 def _test(self, x):
     if isUnicode(x):
         return True
     if all(isinstance(v, qrencoder.QR) for v in x):
         return True
     return False
def _processLine(line, sep=',', conv=0):
    if isUnicode(line):
        space = u' '
        dquot = u'"'
        empty = u''
        speol = u' \r\n'
        sep = asUnicode(sep)
    else:
        space = b' '
        dquot = b'"'
        empty = b''
        speol = b' \r\n'
        sep = asBytes(sep)
    fields = []
    p = 0
    ll = len(line)
    ls = len(sep)
    line += space
    while (ll > 0 and (line[ll-1] in speol)): ll -= 1

    while p < ll:
        #Skip unquoted space at the start of a field
        while p<ll and line[p]==space: p += 1

        field = empty
        ql = 0
        while p < ll:
            #Skip unquoted space at the end of a field
            if ql == 0 and line[p] == space:
                q = p
                while q < ll and line[q] == space:
                    q += 1
                if q >= ll:
                    break
                elif line[q:q+ls] == sep:
                    p = q
            if ql == 0 and line[p:p+ls] == sep:
                break
            elif line[p:p+1] == dquot:
                if ql == 0:
                    ql = 1
                elif line[p+1:p+2]==dquot:
                    field += dquot
                    p += 1
                else:
                    ql = 0
            else:
                field += line[p:p+1]
            p += 1
        p += ls
        if conv:
            try:
                fields.append(int(field))
            except ValueError:
                try:
                    fields.append(float(field))
                except ValueError:
                    fields.append(field)
        else:
            fields.append(field)
    if line[ll-ls:ll]==sep:
        fields.append(empty)    #extra field when there's a separator at the end

    return fields
Beispiel #42
0
    def asciiBase85Encode(input):
        """Encodes input using ASCII-Base85 coding.

        This is a compact encoding used for binary data within
        a PDF file.  Four bytes of binary data become five bytes of
        ASCII.  This is the default method used for encoding images."""
        doOrd = not isPy3 or isUnicode(input)
        # special rules apply if not a multiple of four bytes.
        whole_word_count, remainder_size = divmod(len(input), 4)
        cut = 4 * whole_word_count
        body, lastbit = input[0:cut], input[cut:]

        out = [].append
        for i in range(whole_word_count):
            offset = i * 4
            b1 = body[offset]
            b2 = body[offset + 1]
            b3 = body[offset + 2]
            b4 = body[offset + 3]
            if doOrd:
                b1 = ord(b1)
                b2 = ord(b2)
                b3 = ord(b3)
                b4 = ord(b4)

            if b1 < 128:
                num = (((((b1 << 8) | b2) << 8) | b3) << 8) | b4
            else:
                num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4

            if num == 0:
                #special case
                out('z')
            else:
                #solve for five base-85 numbers
                temp, c5 = divmod(num, 85)
                temp, c4 = divmod(temp, 85)
                temp, c3 = divmod(temp, 85)
                c1, c2 = divmod(temp, 85)
                assert ((85**4) * c1) + ((85**3) * c2) + (
                    (85**2) * c3) + (85 * c4) + c5 == num, 'dodgy code!'
                out(chr(c1 + 33))
                out(chr(c2 + 33))
                out(chr(c3 + 33))
                out(chr(c4 + 33))
                out(chr(c5 + 33))

        # now we do the final bit at the end.  I repeated this separately as
        # the loop above is the time-critical part of a script, whereas this
        # happens only once at the end.

        #encode however many bytes we have as usual
        if remainder_size > 0:
            lastbit += (4 - len(lastbit)) * ('\0' if doOrd else b'\000')
            b1 = lastbit[0]
            b2 = lastbit[1]
            b3 = lastbit[2]
            b4 = lastbit[3]
            if doOrd:
                b1 = ord(b1)
                b2 = ord(b2)
                b3 = ord(b3)
                b4 = ord(b4)

            num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4

            #solve for c1..c5
            temp, c5 = divmod(num, 85)
            temp, c4 = divmod(temp, 85)
            temp, c3 = divmod(temp, 85)
            c1, c2 = divmod(temp, 85)

            #print 'encoding: %d %d %d %d -> %d -> %d %d %d %d %d' % (
            #    b1,b2,b3,b4,num,c1,c2,c3,c4,c5)
            lastword = chr(c1 + 33) + chr(c2 + 33) + chr(c3 + 33) + chr(
                c4 + 33) + chr(c5 + 33)
            #write out most of the bytes.
            out(lastword[0:remainder_size + 1])

        #terminator code for ascii 85
        out('~>')
        return ''.join(out.__self__)
Beispiel #43
0
 def instanceStringWidthT1(self, text, size, encoding='utf8'):
     """This is the "purist" approach to width"""
     if not isUnicode(text): text = text.decode(encoding)
     return sum([sum(map(f.widths.__getitem__,t)) for f, t in unicode2T1(text,[self]+self.substitutionFonts)])*0.001*size
Beispiel #44
0
def preProcess(tree, nameSpace, caller=None):
    """Expands the parsed tree in the namespace and return new one.
    Returns a single tag-tuple in most cases, but a list of them
    if processing a loop node.

    """
    from reportPackages.rlextra.radxml import xmlutils
    #expand this into a class with methods for each tag it handles.
    #then I can put logic tags in one and data access in another.
    tagName, attrs, children, extraStuff = tree

    #any attribute as $name becomes th value of name
    #tags might be nested in a loop, and if so then
    #each dictionary must be a fresh copy rather than
    # a pointer to the same dict
    
    newAttrs = attrs.copy() if attrs is not None else {}
    for key, value in list(newAttrs.items()):
        if isinstance(value,str) and value[0:1] == '$':
            newValue = eval(value[1:], nameSpace)
            newAttrs[key] = newValue
    attrs = newAttrs

    if tagName in TAG_LOOP:
        innerTxt = attrs[TAG_LOOP_INNER]
        outer = eval(attrs[TAG_LOOP_OUTER], nameSpace)
        dataSet = []
        for row in outer:
            nameSpace['__loop_inner__'] = row
            rl_exec((innerTxt + " = __loop_inner__\n"), nameSpace)
            #at this point we're making lots of child nodes.
            # the attribute dictionary of each shold be a copy, not
            # a reference
            newChildren = processChildren(children, nameSpace)
            if newChildren is not None:
                dataSet.extend(newChildren)
        return dataSet

    elif tagName in TAG_ASSIGN:
        name = attrs[TAG_ASSIGN_NAME]
        valueStr = attrs[TAG_ASSIGN_VALUE]
        try:
            value = eval(valueStr, nameSpace)
        except SyntaxError:  #must be a string
            value = valueStr
        nameSpace[name] = value
        return None

    elif tagName in TAG_SCRIPT:
        code = children[0]
        if not code.endswith('\n'): code += '\n'
        try:
            rl_exec(code, nameSpace)
        except SyntaxError:
            raise SyntaxError("Error with following script in xpreppy:\n\n%s" % code)
        return None

    elif tagName in TAG_EXPR:
        exprText = children[0]
        assert isinstance(exprText,strTypes), "expr can only contain strings"

        #attributes may affect escaping
        escape = attrs.get(u'escape', None)
        encoding = attrs.get(u'encoding',u'utf8')

        exprValue = eval(exprText, nameSpace)
        if isBytes(exprValue):
            exprValue = exprValue.decode(encoding)
        elif isUnicode(exprValue):
            pass
        else:
            exprValue = asUnicodeEx(exprValue)

        if escape in (u'CDATA',u'CDATAESCAPE'):
            exprValue = u'<![CDATA[%s]]>' % exprValue
            if escape==u'CDATA': return [exprValue]
        elif escape == u'off':
            return [asUnicodeEx(exprValue)]
        elif escape == u'unescape':
            return [xmlutils.unescape(exprValue, ENTITY_SUBSTITUTIONS_DRAWSTRING_DICT)]
        return [xmlEscape(exprValue)]

    elif tagName in TAG_IF:
        condText = attrs[u'cond']
        yesOrNo = eval(condText, nameSpace)
        if yesOrNo:
            return processChildren(children, nameSpace)
    
    elif tagName in TAG_SWITCH:
        #two modes, with and without top level variable
        exprText = attrs.get(u'expr',u'')

        if exprText:
            expr = eval(exprText, nameSpace)

        selected = None
        for child in children:
            if isinstance(child,tuple):
                (childTagName, childAttrs, grandChildren, stuff) = child
                if childTagName in TAG_CASE:
                    condition = childAttrs[u'condition']
                    if exprText:
                        #check if it equals the value
                        try:
                            value = eval(condition, nameSpace)
                        except NameError:
                            value = condition # assume a string
                        if (expr == value):
                            selected = processChildren(grandChildren, nameSpace)
                            break
                    else:
                        #they gave us a full condition, evaluate it
                        yes = eval(condition, nameSpace)
                        if yes:
                            selected = processChildren(grandChildren, nameSpace)
                            break
                elif childTagName in TAG_DEFAULT:
                    selected = processChildren(grandChildren, nameSpace)
                    break
                else:
                    raise ValueError('%s tag may only contain these tags: ' % (TAG_SWITCH, ', '.join(TAG_CASE+TAG_DEFAULT)))

                    
        return selected

    elif tagName in TAG_ACQUIRE:
        #all children will be data fetchers
        xacquire.acquireData(children, nameSpace)
        return None

    elif tagName in TAG_DOCLET:
        #pull out args needed to initialize
        dirName = attrs.get(u"baseDir", None)
        moduleName = attrs[u"module"]
        className = attrs[u"class"]
        dataStr = attrs.get(u"data", None)

        #load module, import and create it
        if caller == 'rml':
            from reportPackages.rlextra.rml2pdf.rml2pdf import _rml2pdf_locations
            locations = _rml2pdf_locations(dirName)
        else:
            locations = dirName
        m = recursiveImport(moduleName, locations)
        klass = getattr(m, className)
        docletObj = klass()

        #give it the data model
        if dataStr:
            dataObj = eval(dataStr, nameSpace)
        else:
            dataObj = nameSpace

        docletObj.setData(dataObj)
            

        #hide it in the tree so RML can see the object
        attrs[u'__doclet__'] = docletObj

        #return the tag otherwise unmodified        
        return (tagName, attrs, children, extraStuff)
    
    else:
        newChildren = processChildren(children, nameSpace)
        return (tagName, attrs, newChildren, extraStuff)
def dumbSplit(word, widths, maxWidths):
    """This function attempts to fit as many characters as possible into the available
    space, cutting "like a knife" between characters.  This would do for Chinese.
    It returns a list of (text, extraSpace) items where text is a Unicode string,
    and extraSpace is the points of unused space available on the line.  This is a
    structure which is fairly easy to display, and supports 'backtracking' approaches
    after the fact.

    Test cases assume each character is ten points wide...

    >>> dumbSplit(u'Hello', [10]*5, 60)
    [[10, u'Hello']]
    >>> dumbSplit(u'Hello', [10]*5, 50)
    [[0, u'Hello']]
    >>> dumbSplit(u'Hello', [10]*5, 40)
    [[0, u'Hell'], [30, u'o']]
    """
    _more = """
    #>>> dumbSplit(u'Hello', [10]*5, 4)   # less than one character
    #(u'', u'Hello')
    # this says 'Nihongo wa muzukashii desu ne!' (Japanese is difficult isn't it?) in 12 characters
    >>> jtext = u'\u65e5\u672c\u8a9e\u306f\u96e3\u3057\u3044\u3067\u3059\u306d\uff01'
    >>> dumbSplit(jtext, [10]*11, 30)   #
    (u'\u65e5\u672c\u8a9e', u'\u306f\u96e3\u3057\u3044\u3067\u3059\u306d\uff01')
    """
    if not isinstance(maxWidths,(list,tuple)): maxWidths = [maxWidths]
    assert isUnicode(word)
    lines = []
    i = widthUsed = lineStartPos = 0
    maxWidth = maxWidths[0]
    nW = len(word)
    while i<nW:
        w = widths[i]
        c = word[i]
        widthUsed += w
        i += 1
        if widthUsed > maxWidth + _FUZZ and widthUsed>0:
            extraSpace = maxWidth - widthUsed
            if ord(c)<0x3000:
                # we appear to be inside a non-Asian script section.
                # (this is a very crude test but quick to compute).
                # This is likely to be quite rare so the speed of the
                # code below is hopefully not a big issue.  The main
                # situation requiring this is that a document title
                # with an english product name in it got cut.
                
                
                # we count back and look for 
                #  - a space-like character
                #  - reversion to Kanji (which would be a good split point)
                #  - in the worst case, roughly half way back along the line
                limitCheck = (lineStartPos+i)>>1        #(arbitrary taste issue)
                for j in range(i-1,limitCheck,-1):
                    cj = word[j]
                    if category(cj)=='Zs' or ord(cj)>=0x3000:
                        k = j+1
                        if k<i:
                            j = k+1
                            extraSpace += sum(widths[j:i])
                            w = widths[k]
                            c = word[k]
                            i = j
                            break

                #end of English-within-Asian special case

            #we are pushing this character back, but
            #the most important of the Japanese typography rules
            #if this character cannot start a line, wrap it up to this line so it hangs
            #in the right margin. We won't do two or more though - that's unlikely and
            #would result in growing ugliness.
            #and increase the extra space
            #bug fix contributed by Alexander Vasilenko <*****@*****.**>
            if c not in ALL_CANNOT_START and i>lineStartPos+1:
                #otherwise we need to push the character back
                #the i>lineStart+1 condition ensures progress
                i -= 1
                extraSpace += w

            #lines.append([maxWidth-sum(widths[lineStartPos:i]), word[lineStartPos:i].strip()])
            lines.append([extraSpace, word[lineStartPos:i].strip()])
            try:
                maxWidth = maxWidths[len(lines)]
            except IndexError:
                maxWidth = maxWidths[-1]  # use the last one
            lineStartPos = i
            widthUsed = 0

    #any characters left?
    if widthUsed > 0:
        lines.append([maxWidth - widthUsed, word[lineStartPos:]])

    return lines
Beispiel #46
0
    def asciiBase85Decode(input):
        """Decodes input using ASCII-Base85 coding.

        This is not normally used - Acrobat Reader decodes for you
        - but a round trip is essential for testing."""
        #strip all whitespace
        stripped = ''.join(asNative(input).split())
        #check end
        assert stripped[-2:] == '~>', 'Invalid terminator for Ascii Base 85 Stream'
        stripped = stripped[:-2]  #chop off terminator

        #may have 'z' in it which complicates matters - expand them
        stripped = stripped.replace('z','!!!!!')
        # special rules apply if not a multiple of five bytes.
        whole_word_count, remainder_size = divmod(len(stripped), 5)
        #print '%d words, %d leftover' % (whole_word_count, remainder_size)
        #assert remainder_size != 1, 'invalid Ascii 85 stream!'
        cut = 5 * whole_word_count
        body, lastbit = stripped[0:cut], stripped[cut:]

        out = [].append
        for i in range(whole_word_count):
            offset = i*5
            c1 = ord(body[offset]) - 33
            c2 = ord(body[offset+1]) - 33
            c3 = ord(body[offset+2]) - 33
            c4 = ord(body[offset+3]) - 33
            c5 = ord(body[offset+4]) - 33

            num = ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5

            temp, b4 = divmod(num,256)
            temp, b3 = divmod(temp,256)
            b1, b2 = divmod(temp, 256)

            assert  num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!'
            out(chr(b1))
            out(chr(b2))
            out(chr(b3))
            out(chr(b4))

        #decode however many bytes we have as usual
        if remainder_size > 0:
            while len(lastbit) < 5:
                lastbit = lastbit + '!'
            c1 = ord(lastbit[0]) - 33
            c2 = ord(lastbit[1]) - 33
            c3 = ord(lastbit[2]) - 33
            c4 = ord(lastbit[3]) - 33
            c5 = ord(lastbit[4]) - 33
            num = (((85*c1+c2)*85+c3)*85+c4)*85 + (c5
                     +(0,0,0xFFFFFF,0xFFFF,0xFF)[remainder_size])
            temp, b4 = divmod(num,256)
            temp, b3 = divmod(temp,256)
            b1, b2 = divmod(temp, 256)
            assert  num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!'
            #print 'decoding: %d %d %d %d %d -> %d -> %d %d %d %d' % (
            #    c1,c2,c3,c4,c5,num,b1,b2,b3,b4)

            #the last character needs 1 adding; the encoding loses
            #data by rounding the number to x bytes, and when
            #divided repeatedly we get one less
            if remainder_size == 2:
                lastword = chr(b1)
            elif remainder_size == 3:
                lastword = chr(b1) + chr(b2)
            elif remainder_size == 4:
                lastword = chr(b1) + chr(b2) + chr(b3)
            else:
                lastword = ''
            out(lastword)

        r = ''.join(out.__self__)
        return r.encode('latin1') if isUnicode(input) else r
Beispiel #47
0
 def _test(self, x):
     if isUnicode(x):
         return True
     if all(isinstance(v, qrencoder.QR) for v in x):
         return True
     return False
Beispiel #48
0
 def __getitem__(self, x):
     try:
         v = TagWrapper.__getitem__(self,x)
         return FakingStr(v) if isinstance(v,strTypes) else v
     except:
         return FakingStr(u'' if isUnicode(self.tagName) else '')
Beispiel #49
0
    def asciiBase85Encode(input):
        """Encodes input using ASCII-Base85 coding.

        This is a compact encoding used for binary data within
        a PDF file.  Four bytes of binary data become five bytes of
        ASCII.  This is the default method used for encoding images."""
        doOrd =  not isPy3 or isUnicode(input)
        # special rules apply if not a multiple of four bytes.
        whole_word_count, remainder_size = divmod(len(input), 4)
        cut = 4 * whole_word_count
        body, lastbit = input[0:cut], input[cut:]

        out = [].append
        for i in range(whole_word_count):
            offset = i*4
            b1 = body[offset]
            b2 = body[offset+1]
            b3 = body[offset+2]
            b4 = body[offset+3]
            if doOrd:
                b1 = ord(b1)
                b2 = ord(b2)
                b3 = ord(b3)
                b4 = ord(b4)

            if b1<128:
                num = (((((b1<<8)|b2)<<8)|b3)<<8)|b4
            else:
                num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4

            if num == 0:
                #special case
                out('z')
            else:
                #solve for five base-85 numbers
                temp, c5 = divmod(num, 85)
                temp, c4 = divmod(temp, 85)
                temp, c3 = divmod(temp, 85)
                c1, c2 = divmod(temp, 85)
                assert ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 == num, 'dodgy code!'
                out(chr(c1+33))
                out(chr(c2+33))
                out(chr(c3+33))
                out(chr(c4+33))
                out(chr(c5+33))

        # now we do the final bit at the end.  I repeated this separately as
        # the loop above is the time-critical part of a script, whereas this
        # happens only once at the end.

        #encode however many bytes we have as usual
        if remainder_size > 0:
            lastbit += (4-len(lastbit))*('\0' if doOrd else b'\000')
            b1 = lastbit[0]
            b2 = lastbit[1]
            b3 = lastbit[2]
            b4 = lastbit[3]
            if doOrd:
                b1 = ord(b1)
                b2 = ord(b2)
                b3 = ord(b3)
                b4 = ord(b4)

            num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4

            #solve for c1..c5
            temp, c5 = divmod(num, 85)
            temp, c4 = divmod(temp, 85)
            temp, c3 = divmod(temp, 85)
            c1, c2 = divmod(temp, 85)

            #print 'encoding: %d %d %d %d -> %d -> %d %d %d %d %d' % (
            #    b1,b2,b3,b4,num,c1,c2,c3,c4,c5)
            lastword = chr(c1+33) + chr(c2+33) + chr(c3+33) + chr(c4+33) + chr(c5+33)
            #write out most of the bytes.
            out(lastword[0:remainder_size + 1])

        #terminator code for ascii 85
        out('~>')
        return ''.join(out.__self__)
    def drawImage(self,
                  image,
                  filename,
                  x,
                  y,
                  width=None,
                  height=None,
                  mask=None,
                  preserveAspectRatio=False,
                  anchor='c'):
        self._currentPageHasImages = 1

        #imagename, use it
        s = '%s%s' % (filename, mask)
        if isUnicode(s):
            s = s.encode('utf-8')
        name = _digester(s)

        # in the pdf document, this will be prefixed with something to
        # say it is an XObject.  Does it exist yet?
        regName = self._doc.getXObjectName(name)
        imgObj = self._doc.idToObject.get(regName, None)
        if not imgObj:
            #first time seen, create and register the PDFImageXobject
            imgObj = pdfdoc.PDFImageXObject(name, mask=mask)
            ext = os.path.splitext(filename)[1].lower()
            if not (ext in ('.jpg', '.jpeg')
                    and imgObj.loadImageFromJPEG(image)):
                if rl_config.useA85:
                    imgObj.loadImageFromA85(image)
                else:
                    imgObj.loadImageFromRaw(image)

            imgObj.name = name
            self._setXObjects(imgObj)
            self._doc.Reference(imgObj, regName)
            self._doc.addForm(name, imgObj)
            smask = getattr(imgObj, '_smask', None)
            if smask:  #set up the softmask obtained above
                mRegName = self._doc.getXObjectName(smask.name)
                mImgObj = self._doc.idToObject.get(mRegName, None)
                if not mImgObj:
                    self._setXObjects(smask)
                    imgObj.smask = self._doc.Reference(smask, mRegName)
                else:
                    imgObj.smask = pdfdoc.PDFObjectReference(mRegName)
                del imgObj._smask

        # ensure we have a size, as PDF will make it 1x1 pixel otherwise!
        x, y, width, height, scaled = aspectRatioFix(preserveAspectRatio,
                                                     anchor, x, y, width,
                                                     height, imgObj.width,
                                                     imgObj.height)

        # scale and draw
        self.saveState()
        self.translate(x, y)
        self.scale(width, height)
        self._code.append("/%s Do" % regName)
        self.restoreState()

        # track what's been used on this page
        self._formsinuse.append(name)

        return (imgObj.width, imgObj.height)
Beispiel #51
0
def preProcess(tree, nameSpace, caller=None):
    """Expands the parsed tree in the namespace and return new one.
    Returns a single tag-tuple in most cases, but a list of them
    if processing a loop node.

    """
    from rlextra.radxml import xmlutils
    #expand this into a class with methods for each tag it handles.
    #then I can put logic tags in one and data access in another.
    tagName, attrs, children, extraStuff = tree

    #any attribute as $name becomes th value of name
    #tags might be nested in a loop, and if so then
    #each dictionary must be a fresh copy rather than
    # a pointer to the same dict
    
    newAttrs = attrs.copy() if attrs is not None else {}
    for key, value in list(newAttrs.items()):
        if isinstance(value,str) and value[0:1] == '$':
            newValue = eval(value[1:], nameSpace)
            newAttrs[key] = newValue
    attrs = newAttrs

    if tagName in TAG_LOOP:
        innerTxt = attrs[TAG_LOOP_INNER]
        outer = eval(attrs[TAG_LOOP_OUTER], nameSpace)
        dataSet = []
        for row in outer:
            nameSpace['__loop_inner__'] = row
            rl_exec((innerTxt + " = __loop_inner__\n"), nameSpace)
            #at this point we're making lots of child nodes.
            # the attribute dictionary of each shold be a copy, not
            # a reference
            newChildren = processChildren(children, nameSpace)
            if newChildren is not None:
                dataSet.extend(newChildren)
        return dataSet

    elif tagName in TAG_ASSIGN:
        name = attrs[TAG_ASSIGN_NAME]
        valueStr = attrs[TAG_ASSIGN_VALUE]
        try:
            value = eval(valueStr)
        except SyntaxError:  #must be a string
            value = valueStr
        nameSpace[name] = value
        return None

    elif tagName in TAG_SCRIPT:
        code = children[0]
        if not code.endswith('\n'): code += '\n'
        try:
            rl_exec(code, nameSpace)
        except SyntaxError:
            raise SyntaxError("Error with following script in xpreppy:\n\n%s" % code)
        return None

    elif tagName in TAG_EXPR:
        exprText = children[0]
        assert isinstance(exprText,strTypes), "expr can only contain strings"

        #attributes may affect escaping
        escape = attrs.get(u'escape', None)
        encoding = attrs.get(u'encoding',u'utf8')

        exprValue = eval(exprText, nameSpace)
        if isBytes(exprValue):
            exprValue = exprValue.decode(encoding)
        elif isUnicode(exprValue):
            pass
        else:
            exprValue = asUnicodeEx(exprValue)

        if escape in (u'CDATA',u'CDATAESCAPE'):
            exprValue = u'<![CDATA[%s]]>' % exprValue
            if escape==u'CDATA': return [exprValue]
        elif escape == u'off':
            return [asUnicodeEx(exprValue)]
        elif escape == u'unescape':
            return [xmlutils.unescape(exprValue, ENTITY_SUBSTITUTIONS_DRAWSTRING_DICT)]
        return [xmlEscape(exprValue)]

    elif tagName in TAG_IF:
        condText = attrs[u'cond']
        yesOrNo = eval(condText, nameSpace)
        if yesOrNo:
            return processChildren(children, nameSpace)
    
    elif tagName in TAG_SWITCH:
        #two modes, with and without top level variable
        exprText = attrs.get(u'expr',u'')

        if exprText:
            expr = eval(exprText, nameSpace)

        selected = None
        for child in children:
            if isinstance(child,tuple):
                (childTagName, childAttrs, grandChildren, stuff) = child
                if childTagName in TAG_CASE:
                    condition = childAttrs[u'condition']
                    if exprText:
                        #check if it equals the value
                        try:
                            value = eval(condition, nameSpace)
                        except NameError:
                            value = condition # assume a string
                        if (expr == value):
                            selected = processChildren(grandChildren, nameSpace)
                            break
                    else:
                        #they gave us a full condition, evaluate it
                        yes = eval(condition, nameSpace)
                        if yes:
                            selected = processChildren(grandChildren, nameSpace)
                            break
                elif childTagName in TAG_DEFAULT:
                    selected = processChildren(grandChildren, nameSpace)
                    break
                else:
                    raise ValueError('%s tag may only contain these tags: ' % (TAG_SWITCH, ', '.join(TAG_CASE+TAG_DEFAULT)))

                    
        return selected

    elif tagName in TAG_ACQUIRE:
        #all children will be data fetchers
        xacquire.acquireData(children, nameSpace)
        return None

    elif tagName in TAG_DOCLET:
        #pull out args needed to initialize
        dirName = attrs.get(u"baseDir", None)
        moduleName = attrs[u"module"]
        className = attrs[u"class"]
        dataStr = attrs.get(u"data", None)

        #load module, import and create it
        if caller == 'rml':
            from rlextra.rml2pdf.rml2pdf import _rml2pdf_locations
            locations = _rml2pdf_locations(dirName)
        else:
            locations = dirName
        m = recursiveImport(moduleName, locations)
        klass = getattr(m, className)
        docletObj = klass()

        #give it the data model
        if dataStr:
            dataObj = eval(dataStr, nameSpace)
        else:
            dataObj = nameSpace

        docletObj.setData(dataObj)
            

        #hide it in the tree so RML can see the object
        attrs[u'__doclet__'] = docletObj

        #return the tag otherwise unmodified        
        return (tagName, attrs, children, extraStuff)
    
    else:
        newChildren = processChildren(children, nameSpace)
        return (tagName, attrs, newChildren, extraStuff)