Ejemplo n.º 1
0
    def stringWidth(self, text, size, encoding=None):
        "Just ensure we do width test on characters, not bytes..."
        if isBytes(text):
            text = text.decode('utf8')

        widths = self.unicodeWidths
        return size * 0.001 * sum([widths.get(uch, 1000) for uch in text])
    def stringWidth(self, text, size, encoding=None):
        "Just ensure we do width test on characters, not bytes..."
        if isBytes(text):
            text = text.decode('utf8')

        widths = self.unicodeWidths
        return size * 0.001 * sum([widths.get(uch, 1000) for uch in text])
Ejemplo n.º 3
0
 def inner(*args, **kwds):
     text = args[tx]
     if isBytes(tx):
         text = text.decode(enc)
         args = args[:tx]+(text,)+args[tx:]
         return f(*args, **kwds).encode(enc)
     else:
         return f(*args, **kwds)
 def formatForPdf(self, text):
     #these ones should be encoded asUTF16 minus the BOM
     from codecs import utf_16_be_encode
     #print 'formatting %s: %s' % (type(text), repr(text))
     if isBytes(text):
         text = text.decode('utf8')
     utfText = utf_16_be_encode(text)[0]
     encoded = escapePDF(utfText)
     #print '  encoded:',encoded
     return encoded
Ejemplo n.º 5
0
 def formatForPdf(self, text):
     #these ones should be encoded asUTF16 minus the BOM
     from codecs import utf_16_be_encode
     #print 'formatting %s: %s' % (type(text), repr(text))
     if isBytes(text):
         text = text.decode('utf8')
     utfText = utf_16_be_encode(text)[0]
     encoded = escapePDF(utfText)
     #print '  encoded:',encoded
     return encoded
Ejemplo n.º 6
0
 def write(self,u):
     if isBytes(u):
         try:
              u = u.decode('utf-8')
         except:
             et, ev, tb = sys.exc_info()
             ev = str(ev)
             del et, tb
             raise ValueError("String %r not encoded as 'utf-8'\nerror=%s" % (u,ev))
     elif not isUnicode(u):
         raise ValueError("EncodedWriter.write(%s) argument should be 'utf-8' bytes or str" % ascii(u))
     self.append(u)
Ejemplo n.º 7
0
def text2PathDescription(text,
                         x=0,
                         y=0,
                         fontName=_baseGFontName,
                         fontSize=1000,
                         anchor='start',
                         truncate=1,
                         pathReverse=0,
                         gs=None):
    font = getFont(fontName)
    if font._multiByte and not font._dynamicFont:
        raise ValueError(
            "text2PathDescription doesn't support multi byte fonts like %r" %
            fontName)
    P_extend = [].extend
    if not anchor == 'start':
        textLen = stringWidth(text, fontName, fontSize)
        if anchor == 'end':
            x = x - textLen
        elif anchor == 'middle':
            x = x - textLen / 2.
    if gs is None:
        from ._renderPM import gstate
        gs = gstate(1, 1)
    setFont(gs, fontName, fontSize)
    if font._dynamicFont:
        for g in gs._stringPath(text, x, y):
            P_extend(
                processGlyph(g, truncate=truncate, pathReverse=pathReverse))
    else:
        if isBytes(text):
            try:
                text = text.decode('utf8')
            except UnicodeDecodeError as e:
                i, j = e.args[2:4]
                raise UnicodeDecodeError(*(e.args[:4] +
                                           ('%s\n%s-->%s<--%s' %
                                            (e.args[4], text[max(i - 10, 0):i],
                                             text[i:j], text[j:j + 10]), )))
        fc = font
        FT = unicode2T1(text, [font] + font.substitutionFonts)
        nm1 = len(FT) - 1
        for i, (f, t) in enumerate(FT):
            if f != fc:
                setFont(gs, f.fontName, fontSize)
                fc = f
            for g in gs._stringPath(t, x, y):
                P_extend(
                    processGlyph(g, truncate=truncate,
                                 pathReverse=pathReverse))
            if i != nm1:
                x += f.stringWidth(t.decode(f.encName), fontSize)
    return P_extend.__self__
 def write(self,u):
     if isBytes(u):
         try:
              u = u.decode('utf-8')
         except:
             et, ev, tb = sys.exc_info()
             ev = str(ev)
             del et, tb
             raise ValueError("String %r not encoded as 'utf-8'\nerror=%s" % (u,ev))
     elif not isUnicode(u):
         raise ValueError("EncodedWriter.write(%s) argument should be 'utf-8' bytes or str" % ascii(u))
     self.append(u)
Ejemplo n.º 9
0
    def _formatText(self, text):
        "Generates PDF text output operator(s)"
        if log2vis and self.direction in ('LTR', 'RTL'):
            # Use pyfribidi to write the text in the correct visual order.
            text = log2vis(text,
                           directionsMap.get(self.direction, DIR_ON),
                           clean=True)
        canv = self._canvas
        font = pdfmetrics.getFont(self._fontname)
        R = []
        if font._dynamicFont:
            #it's a truetype font and should be utf8.  If an error is raised,
            for subset, t in font.splitString(text, canv._doc):
                if subset != self._curSubset:
                    pdffontname = font.getSubsetInternalName(subset, canv._doc)
                    R.append("%s %s Tf %s TL" %
                             (pdffontname, fp_str(
                                 self._fontsize), fp_str(self._leading)))
                    self._curSubset = subset
                R.append("(%s) Tj" % canv._escape(t))
        elif font._multiByte:
            #all the fonts should really work like this - let them know more about PDF...
            R.append("%s %s Tf %s TL" % (canv._doc.getInternalFontName(
                font.fontName), fp_str(self._fontsize), fp_str(self._leading)))
            R.append("(%s) Tj" % font.formatForPdf(text))
        else:
            #convert to T1  coding
            fc = font
            if isBytes(text):
                try:
                    text = text.decode('utf8')
                except UnicodeDecodeError as e:
                    i, j = e.args[2:4]
                    raise UnicodeDecodeError(
                        *(e.args[:4] + ('%s\n%s-->%s<--%s' %
                                        (e.args[4], text[max(i - 10, 0):i],
                                         text[i:j], text[j:j + 10]), )))

            for f, t in pdfmetrics.unicode2T1(text,
                                              [font] + font.substitutionFonts):
                if f != fc:
                    R.append("%s %s Tf %s TL" %
                             (canv._doc.getInternalFontName(f.fontName),
                              fp_str(self._fontsize), fp_str(self._leading)))
                    fc = f
                R.append("(%s) Tj" % canv._escape(t))
            if font != fc:
                R.append("%s %s Tf %s TL" %
                         (canv._doc.getInternalFontName(self._fontname),
                          fp_str(self._fontsize), fp_str(self._leading)))
        return ' '.join(R)
    def __init__(self, fileNameOrContent):
        '''fileNameContent has to be bytes content or a string filename'''
        self.cleanText = {}
        self._annotations = None
        self.context = None
        self.rawContent = None

        #when parsing referenced streams, keep the answers so we don't have to
        #do it several times
        self._xobjectStreams = {}

        #if they pass a big string which looks like PDF, assume a literal
        if isBytes(fileNameOrContent) and b"%PDF" in fileNameOrContent[0:20]:
            content = fileNameOrContent
            self.parseContent(content)
        else:
            fileName = fileNameOrContent
            self.open(fileName)
Ejemplo n.º 11
0
    def _formatText(self, text):
        "Generates PDF text output operator(s)"
        if log2vis and self.direction in ('LTR','RTL'):
            # Use pyfribidi to write the text in the correct visual order.
            text = log2vis(text, directionsMap.get(self.direction.upper(),DIR_ON),clean=True)
        canv = self._canvas
        font = pdfmetrics.getFont(self._fontname)
        R = []
        if font._dynamicFont:
            #it's a truetype font and should be utf8.  If an error is raised,
            for subset, t in font.splitString(text, canv._doc):
                if subset!=self._curSubset:
                    pdffontname = font.getSubsetInternalName(subset, canv._doc)
                    R.append("%s %s Tf %s TL" % (pdffontname, fp_str(self._fontsize), fp_str(self._leading)))
                    self._curSubset = subset
                R.append("(%s) Tj" % canv._escape(t))
        elif font._multiByte:
            #all the fonts should really work like this - let them know more about PDF...
            R.append("%s %s Tf %s TL" % (
                canv._doc.getInternalFontName(font.fontName),
                fp_str(self._fontsize),
                fp_str(self._leading)
                ))
            R.append("(%s) Tj" % font.formatForPdf(text))
        else:
            #convert to T1  coding
            fc = font
            if isBytes(text):
                try:
                    text = text.decode('utf8')
                except UnicodeDecodeError as e:
                    i,j = e.args[2:4]
                    raise UnicodeDecodeError(*(e.args[:4]+('%s\n%s-->%s<--%s' % (e.args[4],text[max(i-10,0):i],text[i:j],text[j:j+10]),)))

            for f, t in pdfmetrics.unicode2T1(text,[font]+font.substitutionFonts):
                if f!=fc:
                    R.append("%s %s Tf %s TL" % (canv._doc.getInternalFontName(f.fontName), fp_str(self._fontsize), fp_str(self._leading)))
                    fc = f
                R.append("(%s) Tj" % canv._escape(t))
            if font!=fc:
                R.append("%s %s Tf %s TL" % (canv._doc.getInternalFontName(self._fontname), fp_str(self._fontsize), fp_str(self._leading)))
        return ' '.join(R)
Ejemplo n.º 12
0
def tt2xml(tt):
    '''convert tuple tree form to unicode xml'''
    if tt is None: return ''
    if isBytes(tt):
        return tt2xml(tt.decode('utf8'))
    if isUnicode(tt):
        return escape(tt)
    if isinstance(tt,list):
        return ''.join(tt2xml(x) for x in tt)
    if isinstance(tt,tuple):
        tag = tt[0].decode('utf8')
        L=['<'+tag].append
        C = tt[2]
        if tt[1]:
            for k,v in tt[1].items():
                L((' %s=%s' % (k,quoteattr(v))).decode('utf8'))
        if C is not None:
            L('>')
            L(tt2xml(C))
            L('</'+tag+'>')
        else:
            L('/>')
        return ''.join(L.__self__)
    raise ValueError('Invalid value %r passed to tt2xml' % tt)
Ejemplo n.º 13
0
 def normalize(self, x):
     return int(x.decode('utf8') if isBytes(x) else x)
Ejemplo n.º 14
0
def _re_match(pat,text,flags=re.M|re.I):
    if isPy3 and isBytes(text):
            pat = pat.encode('latin1')
    return re.match(pat,text,flags)
Ejemplo n.º 15
0
def preProcess(tree, nameSpace, caller=None):
    """Expands the parsed tree in the namespace and return new one.
    Returns a single tag-tuple in most cases, but a list of them
    if processing a loop node.

    """
    from reportPackages.rlextra.radxml import xmlutils
    #expand this into a class with methods for each tag it handles.
    #then I can put logic tags in one and data access in another.
    tagName, attrs, children, extraStuff = tree

    #any attribute as $name becomes th value of name
    #tags might be nested in a loop, and if so then
    #each dictionary must be a fresh copy rather than
    # a pointer to the same dict
    
    newAttrs = attrs.copy() if attrs is not None else {}
    for key, value in list(newAttrs.items()):
        if isinstance(value,str) and value[0:1] == '$':
            newValue = eval(value[1:], nameSpace)
            newAttrs[key] = newValue
    attrs = newAttrs

    if tagName in TAG_LOOP:
        innerTxt = attrs[TAG_LOOP_INNER]
        outer = eval(attrs[TAG_LOOP_OUTER], nameSpace)
        dataSet = []
        for row in outer:
            nameSpace['__loop_inner__'] = row
            rl_exec((innerTxt + " = __loop_inner__\n"), nameSpace)
            #at this point we're making lots of child nodes.
            # the attribute dictionary of each shold be a copy, not
            # a reference
            newChildren = processChildren(children, nameSpace)
            if newChildren is not None:
                dataSet.extend(newChildren)
        return dataSet

    elif tagName in TAG_ASSIGN:
        name = attrs[TAG_ASSIGN_NAME]
        valueStr = attrs[TAG_ASSIGN_VALUE]
        try:
            value = eval(valueStr, nameSpace)
        except SyntaxError:  #must be a string
            value = valueStr
        nameSpace[name] = value
        return None

    elif tagName in TAG_SCRIPT:
        code = children[0]
        if not code.endswith('\n'): code += '\n'
        try:
            rl_exec(code, nameSpace)
        except SyntaxError:
            raise SyntaxError("Error with following script in xpreppy:\n\n%s" % code)
        return None

    elif tagName in TAG_EXPR:
        exprText = children[0]
        assert isinstance(exprText,strTypes), "expr can only contain strings"

        #attributes may affect escaping
        escape = attrs.get(u'escape', None)
        encoding = attrs.get(u'encoding',u'utf8')

        exprValue = eval(exprText, nameSpace)
        if isBytes(exprValue):
            exprValue = exprValue.decode(encoding)
        elif isUnicode(exprValue):
            pass
        else:
            exprValue = asUnicodeEx(exprValue)

        if escape in (u'CDATA',u'CDATAESCAPE'):
            exprValue = u'<![CDATA[%s]]>' % exprValue
            if escape==u'CDATA': return [exprValue]
        elif escape == u'off':
            return [asUnicodeEx(exprValue)]
        elif escape == u'unescape':
            return [xmlutils.unescape(exprValue, ENTITY_SUBSTITUTIONS_DRAWSTRING_DICT)]
        return [xmlEscape(exprValue)]

    elif tagName in TAG_IF:
        condText = attrs[u'cond']
        yesOrNo = eval(condText, nameSpace)
        if yesOrNo:
            return processChildren(children, nameSpace)
    
    elif tagName in TAG_SWITCH:
        #two modes, with and without top level variable
        exprText = attrs.get(u'expr',u'')

        if exprText:
            expr = eval(exprText, nameSpace)

        selected = None
        for child in children:
            if isinstance(child,tuple):
                (childTagName, childAttrs, grandChildren, stuff) = child
                if childTagName in TAG_CASE:
                    condition = childAttrs[u'condition']
                    if exprText:
                        #check if it equals the value
                        try:
                            value = eval(condition, nameSpace)
                        except NameError:
                            value = condition # assume a string
                        if (expr == value):
                            selected = processChildren(grandChildren, nameSpace)
                            break
                    else:
                        #they gave us a full condition, evaluate it
                        yes = eval(condition, nameSpace)
                        if yes:
                            selected = processChildren(grandChildren, nameSpace)
                            break
                elif childTagName in TAG_DEFAULT:
                    selected = processChildren(grandChildren, nameSpace)
                    break
                else:
                    raise ValueError('%s tag may only contain these tags: ' % (TAG_SWITCH, ', '.join(TAG_CASE+TAG_DEFAULT)))

                    
        return selected

    elif tagName in TAG_ACQUIRE:
        #all children will be data fetchers
        xacquire.acquireData(children, nameSpace)
        return None

    elif tagName in TAG_DOCLET:
        #pull out args needed to initialize
        dirName = attrs.get(u"baseDir", None)
        moduleName = attrs[u"module"]
        className = attrs[u"class"]
        dataStr = attrs.get(u"data", None)

        #load module, import and create it
        if caller == 'rml':
            from reportPackages.rlextra.rml2pdf.rml2pdf import _rml2pdf_locations
            locations = _rml2pdf_locations(dirName)
        else:
            locations = dirName
        m = recursiveImport(moduleName, locations)
        klass = getattr(m, className)
        docletObj = klass()

        #give it the data model
        if dataStr:
            dataObj = eval(dataStr, nameSpace)
        else:
            dataObj = nameSpace

        docletObj.setData(dataObj)
            

        #hide it in the tree so RML can see the object
        attrs[u'__doclet__'] = docletObj

        #return the tag otherwise unmodified        
        return (tagName, attrs, children, extraStuff)
    
    else:
        newChildren = processChildren(children, nameSpace)
        return (tagName, attrs, newChildren, extraStuff)
Ejemplo n.º 16
0
 def normalize(self,x):
     return int(x.decode('utf8') if isBytes(x) else x)
Ejemplo n.º 17
0
def _re_match(pat,text,flags=re.M|re.I):
    if isBytes(text):
        pat = pat.encode('latin1')
    return re.match(pat,text,flags)
Ejemplo n.º 18
0
def preProcess(tree, nameSpace, caller=None):
    """Expands the parsed tree in the namespace and return new one.
    Returns a single tag-tuple in most cases, but a list of them
    if processing a loop node.

    """
    from rlextra.radxml import xmlutils
    #expand this into a class with methods for each tag it handles.
    #then I can put logic tags in one and data access in another.
    tagName, attrs, children, extraStuff = tree

    #any attribute as $name becomes th value of name
    #tags might be nested in a loop, and if so then
    #each dictionary must be a fresh copy rather than
    # a pointer to the same dict
    
    newAttrs = attrs.copy() if attrs is not None else {}
    for key, value in list(newAttrs.items()):
        if isinstance(value,str) and value[0:1] == '$':
            newValue = eval(value[1:], nameSpace)
            newAttrs[key] = newValue
    attrs = newAttrs

    if tagName in TAG_LOOP:
        innerTxt = attrs[TAG_LOOP_INNER]
        outer = eval(attrs[TAG_LOOP_OUTER], nameSpace)
        dataSet = []
        for row in outer:
            nameSpace['__loop_inner__'] = row
            rl_exec((innerTxt + " = __loop_inner__\n"), nameSpace)
            #at this point we're making lots of child nodes.
            # the attribute dictionary of each shold be a copy, not
            # a reference
            newChildren = processChildren(children, nameSpace)
            if newChildren is not None:
                dataSet.extend(newChildren)
        return dataSet

    elif tagName in TAG_ASSIGN:
        name = attrs[TAG_ASSIGN_NAME]
        valueStr = attrs[TAG_ASSIGN_VALUE]
        try:
            value = eval(valueStr)
        except SyntaxError:  #must be a string
            value = valueStr
        nameSpace[name] = value
        return None

    elif tagName in TAG_SCRIPT:
        code = children[0]
        if not code.endswith('\n'): code += '\n'
        try:
            rl_exec(code, nameSpace)
        except SyntaxError:
            raise SyntaxError("Error with following script in xpreppy:\n\n%s" % code)
        return None

    elif tagName in TAG_EXPR:
        exprText = children[0]
        assert isinstance(exprText,strTypes), "expr can only contain strings"

        #attributes may affect escaping
        escape = attrs.get(u'escape', None)
        encoding = attrs.get(u'encoding',u'utf8')

        exprValue = eval(exprText, nameSpace)
        if isBytes(exprValue):
            exprValue = exprValue.decode(encoding)
        elif isUnicode(exprValue):
            pass
        else:
            exprValue = asUnicodeEx(exprValue)

        if escape in (u'CDATA',u'CDATAESCAPE'):
            exprValue = u'<![CDATA[%s]]>' % exprValue
            if escape==u'CDATA': return [exprValue]
        elif escape == u'off':
            return [asUnicodeEx(exprValue)]
        elif escape == u'unescape':
            return [xmlutils.unescape(exprValue, ENTITY_SUBSTITUTIONS_DRAWSTRING_DICT)]
        return [xmlEscape(exprValue)]

    elif tagName in TAG_IF:
        condText = attrs[u'cond']
        yesOrNo = eval(condText, nameSpace)
        if yesOrNo:
            return processChildren(children, nameSpace)
    
    elif tagName in TAG_SWITCH:
        #two modes, with and without top level variable
        exprText = attrs.get(u'expr',u'')

        if exprText:
            expr = eval(exprText, nameSpace)

        selected = None
        for child in children:
            if isinstance(child,tuple):
                (childTagName, childAttrs, grandChildren, stuff) = child
                if childTagName in TAG_CASE:
                    condition = childAttrs[u'condition']
                    if exprText:
                        #check if it equals the value
                        try:
                            value = eval(condition, nameSpace)
                        except NameError:
                            value = condition # assume a string
                        if (expr == value):
                            selected = processChildren(grandChildren, nameSpace)
                            break
                    else:
                        #they gave us a full condition, evaluate it
                        yes = eval(condition, nameSpace)
                        if yes:
                            selected = processChildren(grandChildren, nameSpace)
                            break
                elif childTagName in TAG_DEFAULT:
                    selected = processChildren(grandChildren, nameSpace)
                    break
                else:
                    raise ValueError('%s tag may only contain these tags: ' % (TAG_SWITCH, ', '.join(TAG_CASE+TAG_DEFAULT)))

                    
        return selected

    elif tagName in TAG_ACQUIRE:
        #all children will be data fetchers
        xacquire.acquireData(children, nameSpace)
        return None

    elif tagName in TAG_DOCLET:
        #pull out args needed to initialize
        dirName = attrs.get(u"baseDir", None)
        moduleName = attrs[u"module"]
        className = attrs[u"class"]
        dataStr = attrs.get(u"data", None)

        #load module, import and create it
        if caller == 'rml':
            from rlextra.rml2pdf.rml2pdf import _rml2pdf_locations
            locations = _rml2pdf_locations(dirName)
        else:
            locations = dirName
        m = recursiveImport(moduleName, locations)
        klass = getattr(m, className)
        docletObj = klass()

        #give it the data model
        if dataStr:
            dataObj = eval(dataStr, nameSpace)
        else:
            dataObj = nameSpace

        docletObj.setData(dataObj)
            

        #hide it in the tree so RML can see the object
        attrs[u'__doclet__'] = docletObj

        #return the tag otherwise unmodified        
        return (tagName, attrs, children, extraStuff)
    
    else:
        newChildren = processChildren(children, nameSpace)
        return (tagName, attrs, newChildren, extraStuff)