예제 #1
0
 def formatForPdf(self, text):
     #these ones should be encoded asUTF16 minus the BOM
     from codecs import utf_16_be_encode
     #print 'formatting %s: %s' % (type(text), repr(text))
     if type(text) is not unicode:
         text = text.decode('utf8')
     utfText = utf_16_be_encode(text)[0]
     encoded = _escape(utfText)
     #print '  encoded:',encoded
     return encoded
예제 #2
0
파일: cidfonts.py 프로젝트: Guillon88/stdm
 def formatForPdf(self, text):
     #these ones should be encoded asUTF16 minus the BOM
     from codecs import utf_16_be_encode
     #print 'formatting %s: %s' % (type(text), repr(text))
     if type(text) is not unicode:
         text = text.decode('utf8')
     utfText = utf_16_be_encode(text)[0]
     encoded = _escape(utfText)
     #print '  encoded:',encoded
     return encoded
예제 #3
0
 def formatForPdf(self, text):
     encoded = _escape(text)
     #print 'encoded CIDFont:', encoded
     return encoded
예제 #4
0
파일: cidfonts.py 프로젝트: Guillon88/stdm
 def formatForPdf(self, text):
     encoded = _escape(text)
     #print 'encoded CIDFont:', encoded
     return encoded
예제 #5
0
from reportlab.pdfbase import pdfutils

from reportlab.platypus.paragraph import Paragraph
from reportlab.lib.styles import ParagraphStyle
from reportlab.graphics.shapes import Drawing, String, Ellipse
import re
import codecs
textPat = re.compile(r'\([^(]*\)')

#test sentences
testCp1252 = 'copyright %s trademark %s registered %s ReportLab! Ol%s!' % (
    chr(169), chr(153), chr(174), chr(0xe9))
testUni = unicode(testCp1252, 'cp1252')
testUTF8 = testUni.encode('utf-8')
# expected result is octal-escaped text in the PDF
expectedCp1252 = pdfutils._escape(testCp1252)


def extractText(pdfOps):
    """Utility to rip out the PDF text within a block of PDF operators.

    PDF will show a string draw as something like "(Hello World) Tj"
    i.e. text is in curved brackets. Crude and dirty, probably fails
    on escaped brackets.
    """
    found = textPat.findall(pdfOps)
    #chop off '(' and ')'
    return map(lambda x: x[1:-1], found)


def subsetToUnicode(ttf, subsetCodeStr):
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfutils
from reportlab.platypus.paragraph import Paragraph
from reportlab.lib.styles import ParagraphStyle
from reportlab.graphics.shapes import Drawing, String, Ellipse
import re
import codecs
textPat = re.compile(r'\([^(]*\)')

#test sentences
testCp1252 = 'copyright %s trademark %s registered %s ReportLab! Ol%s!' % (chr(169), chr(153),chr(174), chr(0xe9))
testUni = unicode(testCp1252, 'cp1252')
testUTF8 = testUni.encode('utf-8')
# expected result is octal-escaped text in the PDF
expectedCp1252 = pdfutils._escape(testCp1252)

def extractText(pdfOps):
    """Utility to rip out the PDF text within a block of PDF operators.

    PDF will show a string draw as something like "(Hello World) Tj"
    i.e. text is in curved brackets. Crude and dirty, probably fails
    on escaped brackets.
    """
    found = textPat.findall(pdfOps)
    #chop off '(' and ')'
    return map(lambda x:x[1:-1], found)

def subsetToUnicode(ttf, subsetCodeStr):
    """Return unicode string represented by given subsetCode string
    as found when TrueType font rendered to PDF, ttf must be the font