def stringWidth(self, text, size, encoding=None): "Just ensure we do width test on characters, not bytes..." if isBytes(text): text = text.decode('utf8') widths = self.unicodeWidths return size * 0.001 * sum([widths.get(uch, 1000) for uch in text])
def inner(*args, **kwds): text = args[tx] if isBytes(tx): text = text.decode(enc) args = args[:tx]+(text,)+args[tx:] return f(*args, **kwds).encode(enc) else: return f(*args, **kwds)
def formatForPdf(self, text): #these ones should be encoded asUTF16 minus the BOM from codecs import utf_16_be_encode #print 'formatting %s: %s' % (type(text), repr(text)) if isBytes(text): text = text.decode('utf8') utfText = utf_16_be_encode(text)[0] encoded = escapePDF(utfText) #print ' encoded:',encoded return encoded
def write(self,u): if isBytes(u): try: u = u.decode('utf-8') except: et, ev, tb = sys.exc_info() ev = str(ev) del et, tb raise ValueError("String %r not encoded as 'utf-8'\nerror=%s" % (u,ev)) elif not isUnicode(u): raise ValueError("EncodedWriter.write(%s) argument should be 'utf-8' bytes or str" % ascii(u)) self.append(u)
def text2PathDescription(text, x=0, y=0, fontName=_baseGFontName, fontSize=1000, anchor='start', truncate=1, pathReverse=0, gs=None): font = getFont(fontName) if font._multiByte and not font._dynamicFont: raise ValueError( "text2PathDescription doesn't support multi byte fonts like %r" % fontName) P_extend = [].extend if not anchor == 'start': textLen = stringWidth(text, fontName, fontSize) if anchor == 'end': x = x - textLen elif anchor == 'middle': x = x - textLen / 2. if gs is None: from ._renderPM import gstate gs = gstate(1, 1) setFont(gs, fontName, fontSize) if font._dynamicFont: for g in gs._stringPath(text, x, y): P_extend( processGlyph(g, truncate=truncate, pathReverse=pathReverse)) else: if isBytes(text): try: text = text.decode('utf8') except UnicodeDecodeError as e: i, j = e.args[2:4] raise UnicodeDecodeError(*(e.args[:4] + ('%s\n%s-->%s<--%s' % (e.args[4], text[max(i - 10, 0):i], text[i:j], text[j:j + 10]), ))) fc = font FT = unicode2T1(text, [font] + font.substitutionFonts) nm1 = len(FT) - 1 for i, (f, t) in enumerate(FT): if f != fc: setFont(gs, f.fontName, fontSize) fc = f for g in gs._stringPath(t, x, y): P_extend( processGlyph(g, truncate=truncate, pathReverse=pathReverse)) if i != nm1: x += f.stringWidth(t.decode(f.encName), fontSize) return P_extend.__self__
def _formatText(self, text): "Generates PDF text output operator(s)" if log2vis and self.direction in ('LTR', 'RTL'): # Use pyfribidi to write the text in the correct visual order. text = log2vis(text, directionsMap.get(self.direction, DIR_ON), clean=True) canv = self._canvas font = pdfmetrics.getFont(self._fontname) R = [] if font._dynamicFont: #it's a truetype font and should be utf8. If an error is raised, for subset, t in font.splitString(text, canv._doc): if subset != self._curSubset: pdffontname = font.getSubsetInternalName(subset, canv._doc) R.append("%s %s Tf %s TL" % (pdffontname, fp_str( self._fontsize), fp_str(self._leading))) self._curSubset = subset R.append("(%s) Tj" % canv._escape(t)) elif font._multiByte: #all the fonts should really work like this - let them know more about PDF... R.append("%s %s Tf %s TL" % (canv._doc.getInternalFontName( font.fontName), fp_str(self._fontsize), fp_str(self._leading))) R.append("(%s) Tj" % font.formatForPdf(text)) else: #convert to T1 coding fc = font if isBytes(text): try: text = text.decode('utf8') except UnicodeDecodeError as e: i, j = e.args[2:4] raise UnicodeDecodeError( *(e.args[:4] + ('%s\n%s-->%s<--%s' % (e.args[4], text[max(i - 10, 0):i], text[i:j], text[j:j + 10]), ))) for f, t in pdfmetrics.unicode2T1(text, [font] + font.substitutionFonts): if f != fc: R.append("%s %s Tf %s TL" % (canv._doc.getInternalFontName(f.fontName), fp_str(self._fontsize), fp_str(self._leading))) fc = f R.append("(%s) Tj" % canv._escape(t)) if font != fc: R.append("%s %s Tf %s TL" % (canv._doc.getInternalFontName(self._fontname), fp_str(self._fontsize), fp_str(self._leading))) return ' '.join(R)
def __init__(self, fileNameOrContent): '''fileNameContent has to be bytes content or a string filename''' self.cleanText = {} self._annotations = None self.context = None self.rawContent = None #when parsing referenced streams, keep the answers so we don't have to #do it several times self._xobjectStreams = {} #if they pass a big string which looks like PDF, assume a literal if isBytes(fileNameOrContent) and b"%PDF" in fileNameOrContent[0:20]: content = fileNameOrContent self.parseContent(content) else: fileName = fileNameOrContent self.open(fileName)
def _formatText(self, text): "Generates PDF text output operator(s)" if log2vis and self.direction in ('LTR','RTL'): # Use pyfribidi to write the text in the correct visual order. text = log2vis(text, directionsMap.get(self.direction.upper(),DIR_ON),clean=True) canv = self._canvas font = pdfmetrics.getFont(self._fontname) R = [] if font._dynamicFont: #it's a truetype font and should be utf8. If an error is raised, for subset, t in font.splitString(text, canv._doc): if subset!=self._curSubset: pdffontname = font.getSubsetInternalName(subset, canv._doc) R.append("%s %s Tf %s TL" % (pdffontname, fp_str(self._fontsize), fp_str(self._leading))) self._curSubset = subset R.append("(%s) Tj" % canv._escape(t)) elif font._multiByte: #all the fonts should really work like this - let them know more about PDF... R.append("%s %s Tf %s TL" % ( canv._doc.getInternalFontName(font.fontName), fp_str(self._fontsize), fp_str(self._leading) )) R.append("(%s) Tj" % font.formatForPdf(text)) else: #convert to T1 coding fc = font if isBytes(text): try: text = text.decode('utf8') except UnicodeDecodeError as e: i,j = e.args[2:4] raise UnicodeDecodeError(*(e.args[:4]+('%s\n%s-->%s<--%s' % (e.args[4],text[max(i-10,0):i],text[i:j],text[j:j+10]),))) for f, t in pdfmetrics.unicode2T1(text,[font]+font.substitutionFonts): if f!=fc: R.append("%s %s Tf %s TL" % (canv._doc.getInternalFontName(f.fontName), fp_str(self._fontsize), fp_str(self._leading))) fc = f R.append("(%s) Tj" % canv._escape(t)) if font!=fc: R.append("%s %s Tf %s TL" % (canv._doc.getInternalFontName(self._fontname), fp_str(self._fontsize), fp_str(self._leading))) return ' '.join(R)
def tt2xml(tt): '''convert tuple tree form to unicode xml''' if tt is None: return '' if isBytes(tt): return tt2xml(tt.decode('utf8')) if isUnicode(tt): return escape(tt) if isinstance(tt,list): return ''.join(tt2xml(x) for x in tt) if isinstance(tt,tuple): tag = tt[0].decode('utf8') L=['<'+tag].append C = tt[2] if tt[1]: for k,v in tt[1].items(): L((' %s=%s' % (k,quoteattr(v))).decode('utf8')) if C is not None: L('>') L(tt2xml(C)) L('</'+tag+'>') else: L('/>') return ''.join(L.__self__) raise ValueError('Invalid value %r passed to tt2xml' % tt)
def normalize(self, x): return int(x.decode('utf8') if isBytes(x) else x)
def _re_match(pat,text,flags=re.M|re.I): if isPy3 and isBytes(text): pat = pat.encode('latin1') return re.match(pat,text,flags)
def preProcess(tree, nameSpace, caller=None): """Expands the parsed tree in the namespace and return new one. Returns a single tag-tuple in most cases, but a list of them if processing a loop node. """ from reportPackages.rlextra.radxml import xmlutils #expand this into a class with methods for each tag it handles. #then I can put logic tags in one and data access in another. tagName, attrs, children, extraStuff = tree #any attribute as $name becomes th value of name #tags might be nested in a loop, and if so then #each dictionary must be a fresh copy rather than # a pointer to the same dict newAttrs = attrs.copy() if attrs is not None else {} for key, value in list(newAttrs.items()): if isinstance(value,str) and value[0:1] == '$': newValue = eval(value[1:], nameSpace) newAttrs[key] = newValue attrs = newAttrs if tagName in TAG_LOOP: innerTxt = attrs[TAG_LOOP_INNER] outer = eval(attrs[TAG_LOOP_OUTER], nameSpace) dataSet = [] for row in outer: nameSpace['__loop_inner__'] = row rl_exec((innerTxt + " = __loop_inner__\n"), nameSpace) #at this point we're making lots of child nodes. # the attribute dictionary of each shold be a copy, not # a reference newChildren = processChildren(children, nameSpace) if newChildren is not None: dataSet.extend(newChildren) return dataSet elif tagName in TAG_ASSIGN: name = attrs[TAG_ASSIGN_NAME] valueStr = attrs[TAG_ASSIGN_VALUE] try: value = eval(valueStr, nameSpace) except SyntaxError: #must be a string value = valueStr nameSpace[name] = value return None elif tagName in TAG_SCRIPT: code = children[0] if not code.endswith('\n'): code += '\n' try: rl_exec(code, nameSpace) except SyntaxError: raise SyntaxError("Error with following script in xpreppy:\n\n%s" % code) return None elif tagName in TAG_EXPR: exprText = children[0] assert isinstance(exprText,strTypes), "expr can only contain strings" #attributes may affect escaping escape = attrs.get(u'escape', None) encoding = attrs.get(u'encoding',u'utf8') exprValue = eval(exprText, nameSpace) if isBytes(exprValue): exprValue = exprValue.decode(encoding) elif isUnicode(exprValue): pass else: exprValue = asUnicodeEx(exprValue) if escape in (u'CDATA',u'CDATAESCAPE'): exprValue = u'<![CDATA[%s]]>' % exprValue if escape==u'CDATA': return [exprValue] elif escape == u'off': return [asUnicodeEx(exprValue)] elif escape == u'unescape': return [xmlutils.unescape(exprValue, ENTITY_SUBSTITUTIONS_DRAWSTRING_DICT)] return [xmlEscape(exprValue)] elif tagName in TAG_IF: condText = attrs[u'cond'] yesOrNo = eval(condText, nameSpace) if yesOrNo: return processChildren(children, nameSpace) elif tagName in TAG_SWITCH: #two modes, with and without top level variable exprText = attrs.get(u'expr',u'') if exprText: expr = eval(exprText, nameSpace) selected = None for child in children: if isinstance(child,tuple): (childTagName, childAttrs, grandChildren, stuff) = child if childTagName in TAG_CASE: condition = childAttrs[u'condition'] if exprText: #check if it equals the value try: value = eval(condition, nameSpace) except NameError: value = condition # assume a string if (expr == value): selected = processChildren(grandChildren, nameSpace) break else: #they gave us a full condition, evaluate it yes = eval(condition, nameSpace) if yes: selected = processChildren(grandChildren, nameSpace) break elif childTagName in TAG_DEFAULT: selected = processChildren(grandChildren, nameSpace) break else: raise ValueError('%s tag may only contain these tags: ' % (TAG_SWITCH, ', '.join(TAG_CASE+TAG_DEFAULT))) return selected elif tagName in TAG_ACQUIRE: #all children will be data fetchers xacquire.acquireData(children, nameSpace) return None elif tagName in TAG_DOCLET: #pull out args needed to initialize dirName = attrs.get(u"baseDir", None) moduleName = attrs[u"module"] className = attrs[u"class"] dataStr = attrs.get(u"data", None) #load module, import and create it if caller == 'rml': from reportPackages.rlextra.rml2pdf.rml2pdf import _rml2pdf_locations locations = _rml2pdf_locations(dirName) else: locations = dirName m = recursiveImport(moduleName, locations) klass = getattr(m, className) docletObj = klass() #give it the data model if dataStr: dataObj = eval(dataStr, nameSpace) else: dataObj = nameSpace docletObj.setData(dataObj) #hide it in the tree so RML can see the object attrs[u'__doclet__'] = docletObj #return the tag otherwise unmodified return (tagName, attrs, children, extraStuff) else: newChildren = processChildren(children, nameSpace) return (tagName, attrs, newChildren, extraStuff)
def normalize(self,x): return int(x.decode('utf8') if isBytes(x) else x)
def _re_match(pat,text,flags=re.M|re.I): if isBytes(text): pat = pat.encode('latin1') return re.match(pat,text,flags)
def preProcess(tree, nameSpace, caller=None): """Expands the parsed tree in the namespace and return new one. Returns a single tag-tuple in most cases, but a list of them if processing a loop node. """ from rlextra.radxml import xmlutils #expand this into a class with methods for each tag it handles. #then I can put logic tags in one and data access in another. tagName, attrs, children, extraStuff = tree #any attribute as $name becomes th value of name #tags might be nested in a loop, and if so then #each dictionary must be a fresh copy rather than # a pointer to the same dict newAttrs = attrs.copy() if attrs is not None else {} for key, value in list(newAttrs.items()): if isinstance(value,str) and value[0:1] == '$': newValue = eval(value[1:], nameSpace) newAttrs[key] = newValue attrs = newAttrs if tagName in TAG_LOOP: innerTxt = attrs[TAG_LOOP_INNER] outer = eval(attrs[TAG_LOOP_OUTER], nameSpace) dataSet = [] for row in outer: nameSpace['__loop_inner__'] = row rl_exec((innerTxt + " = __loop_inner__\n"), nameSpace) #at this point we're making lots of child nodes. # the attribute dictionary of each shold be a copy, not # a reference newChildren = processChildren(children, nameSpace) if newChildren is not None: dataSet.extend(newChildren) return dataSet elif tagName in TAG_ASSIGN: name = attrs[TAG_ASSIGN_NAME] valueStr = attrs[TAG_ASSIGN_VALUE] try: value = eval(valueStr) except SyntaxError: #must be a string value = valueStr nameSpace[name] = value return None elif tagName in TAG_SCRIPT: code = children[0] if not code.endswith('\n'): code += '\n' try: rl_exec(code, nameSpace) except SyntaxError: raise SyntaxError("Error with following script in xpreppy:\n\n%s" % code) return None elif tagName in TAG_EXPR: exprText = children[0] assert isinstance(exprText,strTypes), "expr can only contain strings" #attributes may affect escaping escape = attrs.get(u'escape', None) encoding = attrs.get(u'encoding',u'utf8') exprValue = eval(exprText, nameSpace) if isBytes(exprValue): exprValue = exprValue.decode(encoding) elif isUnicode(exprValue): pass else: exprValue = asUnicodeEx(exprValue) if escape in (u'CDATA',u'CDATAESCAPE'): exprValue = u'<![CDATA[%s]]>' % exprValue if escape==u'CDATA': return [exprValue] elif escape == u'off': return [asUnicodeEx(exprValue)] elif escape == u'unescape': return [xmlutils.unescape(exprValue, ENTITY_SUBSTITUTIONS_DRAWSTRING_DICT)] return [xmlEscape(exprValue)] elif tagName in TAG_IF: condText = attrs[u'cond'] yesOrNo = eval(condText, nameSpace) if yesOrNo: return processChildren(children, nameSpace) elif tagName in TAG_SWITCH: #two modes, with and without top level variable exprText = attrs.get(u'expr',u'') if exprText: expr = eval(exprText, nameSpace) selected = None for child in children: if isinstance(child,tuple): (childTagName, childAttrs, grandChildren, stuff) = child if childTagName in TAG_CASE: condition = childAttrs[u'condition'] if exprText: #check if it equals the value try: value = eval(condition, nameSpace) except NameError: value = condition # assume a string if (expr == value): selected = processChildren(grandChildren, nameSpace) break else: #they gave us a full condition, evaluate it yes = eval(condition, nameSpace) if yes: selected = processChildren(grandChildren, nameSpace) break elif childTagName in TAG_DEFAULT: selected = processChildren(grandChildren, nameSpace) break else: raise ValueError('%s tag may only contain these tags: ' % (TAG_SWITCH, ', '.join(TAG_CASE+TAG_DEFAULT))) return selected elif tagName in TAG_ACQUIRE: #all children will be data fetchers xacquire.acquireData(children, nameSpace) return None elif tagName in TAG_DOCLET: #pull out args needed to initialize dirName = attrs.get(u"baseDir", None) moduleName = attrs[u"module"] className = attrs[u"class"] dataStr = attrs.get(u"data", None) #load module, import and create it if caller == 'rml': from rlextra.rml2pdf.rml2pdf import _rml2pdf_locations locations = _rml2pdf_locations(dirName) else: locations = dirName m = recursiveImport(moduleName, locations) klass = getattr(m, className) docletObj = klass() #give it the data model if dataStr: dataObj = eval(dataStr, nameSpace) else: dataObj = nameSpace docletObj.setData(dataObj) #hide it in the tree so RML can see the object attrs[u'__doclet__'] = docletObj #return the tag otherwise unmodified return (tagName, attrs, children, extraStuff) else: newChildren = processChildren(children, nameSpace) return (tagName, attrs, newChildren, extraStuff)