Example #1
0
class PDFTextDevice(PDFDevice):
    def handle_undefined_char(self, cidcoding, cid):
        if self.debug:
            print >> sys.stderr, 'undefined: %r, %r' % (cidcoding, cid)
        return '?'

    def render_chars(self, matrix, font, fontsize, charspace, scaling, chars):
        return (0, 0)

    def render_string(self, textstate, seq):
        matrix = mult_matrix(textstate.matrix, self.ctm)
        font = textstate.font
        fontsize = textstate.fontsize
        scaling = textstate.scaling * .01
        charspace = textstate.charspace * scaling
        wordspace = textstate.wordspace * scaling
        if font.is_multibyte():
            wordspace = 0
        dxscale = .001 * fontsize * scaling
        if font.is_vertical():
            textstate.linematrix = self.render_string_vertical(
                seq, matrix, textstate.linematrix, font, fontsize, scaling,
                charspace, wordspace, dxscale)
        else:
            textstate.linematrix = self.render_string_horizontal(
                seq, matrix, textstate.linematrix, font, fontsize, scaling,
                charspace, wordspace, dxscale)
        return

    def render_string_horizontal(self, seq, matrix, (x, y), font, fontsize,
                                 scaling, charspace, wordspace, dxscale):
        chars = []
        needspace = False
        for obj in seq:
            if isinstance(obj, int) or isinstance(obj, float):
                (dx, dy) = self.render_chars(translate_matrix(matrix, (x, y)),
                                             font, fontsize, charspace,
                                             scaling, chars)
                x += dx - obj * dxscale
                y += dy
                chars = []
                needspace = False
            else:
                for cid in font.decode(obj):
                    try:
                        char = font.to_unichr(cid)
                    except PDFUnicodeNotDefined, e:
                        (cidcoding, cid) = e.args
                        char = self.handle_undefined_char(cidcoding, cid)
                    chars.append((char, cid))
                    if cid == 32 and wordspace:
                        if needspace:
                            x += charspace
                        (dx, dy) = self.render_chars(
                            translate_matrix(matrix, (x, y)), font, fontsize,
                            charspace, scaling, chars)
                        needspace = True
                        x += dx + wordspace
                        y += dy
                        chars = []
Example #2
0
 def render_string(self, textstate, textmatrix, seq):
     font = textstate.font
     text = []
     textmatrix = mult_matrix(textmatrix, self.ctm)
     for x in seq:
         if isinstance(x, int) or isinstance(x, float):
             text.append((None, None, x))
         else:
             chars = font.decode(x)
             for cid in chars:
                 try:
                     char = font.to_unicode(cid)
                 except PDFUnicodeNotDefined, e:
                     (cidcoding, cid) = e.args
                     char = self.handle_undefined_char(cidcoding, cid)
                 text.append((char, cid, font.char_disp(cid)))
                 if cid == 32 and not font.is_multibyte():
                     if text:
                         item = TextItem(textmatrix, font,
                                         textstate.fontsize,
                                         textstate.charspace,
                                         textstate.scaling, text)
                         self.cur_item.add(item)
                         (dx, dy) = item.adv
                         dx += textstate.wordspace * textstate.scaling * .01
                         textmatrix = translate_matrix(textmatrix, (dx, dy))
                         text = []
Example #3
0
 def render_string_horizontal(self, seq, matrix, pos,
                              font, fontsize, scaling, charspace, wordspace, rise, dxscale):
     (x, y) = pos
     needcharspace = False
     for obj in seq:
         if isnumber(obj):
             x -= obj*dxscale
             needcharspace = True
         else:
             for cid in font.decode(obj):
                 if needcharspace:
                     x += charspace
                 x += self.render_char(translate_matrix(matrix, (x, y)),
                                       font, fontsize, scaling, rise, cid)
                 if cid == 32 and wordspace:
                     x += wordspace
                 needcharspace = True
     return (x, y)
Example #4
0
class PDFTextDevice(PDFDevice):
    def handle_undefined_char(self, cidcoding, cid):
        if self.debug:
            print >> sys.stderr, 'undefined: %r, %r' % (cidcoding, cid)
        return '?'

    def render_string(self, textstate, seq):
        matrix = mult_matrix(textstate.matrix, self.ctm)
        font = textstate.font
        fontsize = textstate.fontsize
        scaling = textstate.scaling * .01
        charspace = textstate.charspace * scaling
        wordspace = textstate.wordspace * scaling
        rise = textstate.rise
        if font.is_multibyte():
            wordspace = 0
        dxscale = .001 * fontsize * scaling
        if font.is_vertical():
            textstate.linematrix = self.render_string_vertical(
                seq, matrix, textstate.linematrix, font, fontsize, scaling,
                charspace, wordspace, rise, dxscale)
        else:
            textstate.linematrix = self.render_string_horizontal(
                seq, matrix, textstate.linematrix, font, fontsize, scaling,
                charspace, wordspace, rise, dxscale)
        return

    def render_string_horizontal(self, seq, matrix, (x, y), font, fontsize,
                                 scaling, charspace, wordspace, rise, dxscale):
        needcharspace = False
        for obj in seq:
            if isinstance(obj, int) or isinstance(obj, float):
                x -= obj * dxscale
                needcharspace = True
            else:
                for cid in font.decode(obj):
                    if needcharspace:
                        x += charspace
                    x += self.render_char(translate_matrix(matrix, (x, y)),
                                          font, fontsize, scaling, rise, cid)
                    if cid == 32 and wordspace:
                        x += wordspace
                    needcharspace = True
        return (x, y)
Example #5
0
 def do_TJ(self, seq):
     #print >>stderr, 'TJ(%r): %r' % (seq,self.textstate)
     textstate = self.textstate
     textmatrix = translate_matrix(textstate.matrix, textstate.linematrix)
     self.device.render_string(textstate, textmatrix, seq)
     font = textstate.font
     s = ''.join(x for x in seq if isinstance(x, str))
     w = ((font.string_width(s) -
           sum(x for x in seq if not isinstance(x, str)) * .001) *
          textstate.fontsize + len(s) * textstate.charspace)
     (lx, ly) = textstate.linematrix
     if font.is_vertical():
         # advance vertically
         ly += w * (textstate.scaling * .01)
     else:
         # advance horizontally
         if not font.is_multibyte():
             w += s.count(' ') * textstate.wordspace
         lx += w * (textstate.scaling * .01)
     textstate.linematrix = (lx, ly)
     return
Example #6
0
 def do_TJ(self, seq):
     # print >>stderr, 'TJ(%r): %r' % (seq,self.textstate)
     textstate = self.textstate
     textmatrix = translate_matrix(textstate.matrix, textstate.linematrix)
     self.device.render_string(textstate, textmatrix, seq)
     font = textstate.font
     s = "".join(x for x in seq if isinstance(x, str))
     w = (font.string_width(s) - sum(x for x in seq if not isinstance(x, str)) * 0.001) * textstate.fontsize + len(
         s
     ) * textstate.charspace
     (lx, ly) = textstate.linematrix
     if font.is_vertical():
         # advance vertically
         ly += w * (textstate.scaling * 0.01)
     else:
         # advance horizontally
         if not font.is_multibyte():
             w += s.count(" ") * textstate.wordspace
         lx += w * (textstate.scaling * 0.01)
     textstate.linematrix = (lx, ly)
     return
Example #7
0
 def render_string(self, textstate, textmatrix, seq):
   font = textstate.font
   text = []
   textmatrix = mult_matrix(textmatrix, self.ctm)
   for x in seq:
     if isinstance(x, int) or isinstance(x, float):
       text.append((None, None, x))
     else:
       chars = font.decode(x)
       for cid in chars:
         try:
           char = font.to_unicode(cid)
         except PDFUnicodeNotDefined, e:
           (cidcoding, cid) = e.args
           char = self.handle_undefined_char(cidcoding, cid)
         text.append((char, cid, font.char_disp(cid)))
         if cid == 32 and not font.is_multibyte():
           if text:
             item = TextItem(textmatrix, font, textstate.fontsize, textstate.charspace, textstate.scaling, text)
             self.cur_item.add(item)
             (dx,dy) = item.adv
             dx += textstate.wordspace * textstate.scaling * .01
             textmatrix = translate_matrix(textmatrix, (dx, dy))
             text = []
Example #8
0
                        x += wordspace
                    needcharspace = True
        return (x, y)

    def render_string_vertical(self, seq, matrix, (x, y),
                               font, fontsize, scaling, charspace, wordspace, rise, dxscale):
        needcharspace = False
        for obj in seq:
            if isinstance(obj, int) or isinstance(obj, float):
                y -= obj * dxscale
                needcharspace = True
            else:
                for cid in font.decode(obj):
                    if needcharspace:
                        y += charspace
                    y += self.render_char(translate_matrix(matrix, (x, y)),
                                          font, fontsize, scaling, rise, cid)
                    if cid == 32 and wordspace:
                        y += wordspace
                    needcharspace = True
        return (x, y)

    def render_char(self, matrix, font, fontsize, scaling, rise, cid):
        return 0


##  TagExtractor
##
class TagExtractor(PDFDevice):
    def __init__(self, rsrcmgr, outfp, codec='utf-8', debug=0):
        PDFDevice.__init__(self, rsrcmgr)
Example #9
0
                        x += wordspace
                    needcharspace = True
        return (x, y)

    def render_string_vertical(self, seq, matrix, (x, y), font, fontsize,
                               scaling, charspace, wordspace, rise, dxscale):
        needcharspace = False
        for obj in seq:
            if isinstance(obj, int) or isinstance(obj, float):
                y -= obj * dxscale
                needcharspace = True
            else:
                for cid in font.decode(obj):
                    if needcharspace:
                        y += charspace
                    y += self.render_char(translate_matrix(matrix, (x, y)),
                                          font, fontsize, scaling, rise, cid)
                    if cid == 32 and wordspace:
                        y += wordspace
                    needcharspace = True
        return (x, y)

    def render_char(self, matrix, font, fontsize, scaling, rise, cid):
        return 0


##  TagExtractor
##
class TagExtractor(PDFDevice):
    def __init__(self, rsrcmgr, outfp, codec='utf-8', debug=0):
        PDFDevice.__init__(self, rsrcmgr)
Example #10
0
                        char = self.handle_undefined_char(cidcoding, cid)
                    chars.append((char, cid))
                    if cid == 32 and wordspace:
                        if needspace:
                            x += charspace
                        (dx, dy) = self.render_chars(
                            translate_matrix(matrix, (x, y)), font, fontsize,
                            charspace, scaling, chars)
                        needspace = True
                        x += dx + wordspace
                        y += dy
                        chars = []
        if chars:
            if needspace:
                x += charspace
            (dx, dy) = self.render_chars(translate_matrix(matrix,
                                                          (x, y)), font,
                                         fontsize, charspace, scaling, chars)
            x += dx
            y += dy
        return (x, y)

    def render_string_vertical(self, seq, matrix, (x, y), font, fontsize,
                               scaling, charspace, wordspace, dxscale):
        chars = []
        needspace = False
        for obj in seq:
            if isinstance(obj, int) or isinstance(obj, float):
                (dx, dy) = self.render_chars(translate_matrix(matrix, (x, y)),
                                             font, fontsize, charspace,
                                             scaling, chars)
                x += dx
Example #11
0
                    if cid == 32 and wordspace:
                        x += wordspace
                    needcharspace = True
        return x, y

    def render_string_vertical(self, seq, matrix, (x, y), font, fontsize, scaling, charspace, wordspace, rise, dxscale):
        needcharspace = False
        for obj in seq:
            if isinstance(obj, (int, float)):
                y -= obj * dxscale
                needcharspace = True
            else:
                for cid in font.decode(obj):
                    if needcharspace:
                        y += charspace
                    y += self.render_char(translate_matrix(matrix, (x, y)), font, fontsize, scaling, rise, cid)
                    if cid == 32 and wordspace:
                        y += wordspace
                    needcharspace = True
        return x, y

    def render_char(self, matrix, font, fontsize, scaling, rise, cid):
        return 0


class TagExtractor(PDFDevice):

    def __init__(self, rsrcmgr, outfp, codec='utf-8'):
        PDFDevice.__init__(self, rsrcmgr)
        self.outfp = outfp
        self.codec = codec
Example #12
0
                        (cidcoding, cid) = e.args
                        char = self.handle_undefined_char(cidcoding, cid)
                    chars.append((char, cid))
                    if cid == 32 and wordspace:
                        if needspace:
                            x += charspace
                        (dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
                                                    fontsize, charspace, scaling, chars)
                        needspace = True
                        x += dx + wordspace
                        y += dy
                        chars = []
        if chars:
            if needspace:
                x += charspace
            (dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
                                        fontsize, charspace, scaling, chars)
            x += dx
            y += dy
        return (x, y)

    def render_string_vertical(self, seq, matrix, (x,y), 
                               font, fontsize, scaling, charspace, wordspace, dxscale):
        chars = []
        needspace = False
        for obj in seq:
            if isinstance(obj, int) or isinstance(obj, float):
                (dx,dy) = self.render_chars(translate_matrix(matrix, (x,y)), font,
                                            fontsize, charspace, scaling, chars)
                x += dx
                y += dy - obj*dxscale