def formatForPdf(self, text): #these ones should be encoded asUTF16 minus the BOM from codecs import utf_16_be_encode #print 'formatting %s: %s' % (type(text), repr(text)) if type(text) is not unicode: text = text.decode('utf8') utfText = utf_16_be_encode(text)[0] encoded = _escape(utfText) #print ' encoded:',encoded return encoded
def formatForPdf(self, text): encoded = _escape(text) #print 'encoded CIDFont:', encoded return encoded
from reportlab.pdfbase import pdfutils from reportlab.platypus.paragraph import Paragraph from reportlab.lib.styles import ParagraphStyle from reportlab.graphics.shapes import Drawing, String, Ellipse import re import codecs textPat = re.compile(r'\([^(]*\)') #test sentences testCp1252 = 'copyright %s trademark %s registered %s ReportLab! Ol%s!' % ( chr(169), chr(153), chr(174), chr(0xe9)) testUni = unicode(testCp1252, 'cp1252') testUTF8 = testUni.encode('utf-8') # expected result is octal-escaped text in the PDF expectedCp1252 = pdfutils._escape(testCp1252) def extractText(pdfOps): """Utility to rip out the PDF text within a block of PDF operators. PDF will show a string draw as something like "(Hello World) Tj" i.e. text is in curved brackets. Crude and dirty, probably fails on escaped brackets. """ found = textPat.findall(pdfOps) #chop off '(' and ')' return map(lambda x: x[1:-1], found) def subsetToUnicode(ttf, subsetCodeStr):
from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from reportlab.pdfbase import pdfutils from reportlab.platypus.paragraph import Paragraph from reportlab.lib.styles import ParagraphStyle from reportlab.graphics.shapes import Drawing, String, Ellipse import re import codecs textPat = re.compile(r'\([^(]*\)') #test sentences testCp1252 = 'copyright %s trademark %s registered %s ReportLab! Ol%s!' % (chr(169), chr(153),chr(174), chr(0xe9)) testUni = unicode(testCp1252, 'cp1252') testUTF8 = testUni.encode('utf-8') # expected result is octal-escaped text in the PDF expectedCp1252 = pdfutils._escape(testCp1252) def extractText(pdfOps): """Utility to rip out the PDF text within a block of PDF operators. PDF will show a string draw as something like "(Hello World) Tj" i.e. text is in curved brackets. Crude and dirty, probably fails on escaped brackets. """ found = textPat.findall(pdfOps) #chop off '(' and ')' return map(lambda x:x[1:-1], found) def subsetToUnicode(ttf, subsetCodeStr): """Return unicode string represented by given subsetCode string as found when TrueType font rendered to PDF, ttf must be the font