Example #1
0
 def get_colorspace(spec):
     if isinstance(spec, list):
         name = literal_name(spec[0])
     else:
         name = literal_name(spec)
     if name == 'ICCBased' and isinstance(spec, list) and 2 <= len(spec):
         return PDFColorSpace(name, stream_value(spec[1])['N'])
     elif name == 'DeviceN' and isinstance(spec, list) and 2 <= len(spec):
         return PDFColorSpace(name, len(list_value(spec[1])))
     else:
         return PREDEFINED_COLORSPACE.get(name)
Example #2
0
 def get_colorspace(spec):
     if isinstance(spec, list):
         name = literal_name(spec[0])
     else:
         name = literal_name(spec)
     if name == "ICCBased" and isinstance(spec, list) and 2 <= len(spec):
         return ColorSpace(name, stream_value(spec[1]).dic["N"])
     elif name == "DeviceN" and isinstance(spec, list) and 2 <= len(spec):
         return ColorSpace(name, len(list_value(spec[1])))
     else:
         return PREDEFINED_COLORSPACE[name]
Example #3
0
 def get_colorspace(spec):
     if isinstance(spec, list):
         name = literal_name(spec[0])
     else:
         name = literal_name(spec)
     if name == 'ICCBased' and isinstance(spec, list) and 2 <= len(spec):
         return PDFColorSpace(name, stream_value(spec[1])['N'])
     elif name == 'DeviceN' and isinstance(spec, list) and 2 <= len(spec):
         return PDFColorSpace(name, len(list_value(spec[1])))
     else:
         return PREDEFINED_COLORSPACE[name]
Example #4
0
 def do_Do(self, xobjid):
     xobjid = literal_name(xobjid)
     try:
         xobj = stream_value(self.xobjmap[xobjid])
     except KeyError:
         if STRICT:
             raise PDFInterpreterError("Undefined xobject id: %r" % xobjid)
         return
     if 1 <= self.debug:
         print >> stderr, "Processing xobj: %r" % xobj
     subtype = xobj.dic.get("Subtype")
     try:
         if subtype == LITERAL_FORM and "BBox" in xobj.dic:
             interpreter = PDFPageInterpreter(self.rsrc, self.device)
             (x0, y0, x1, y1) = xobj.dic["BBox"]
             ctm = mult_matrix(xobj.dic.get("Matrix", MATRIX_IDENTITY), self.ctm)
             (x0, y0) = apply_matrix(ctm, (x0, y0))
             (x1, y1) = apply_matrix(ctm, (x1, y1))
             bbox = (x0, y0, x1, y1)
             self.device.begin_figure(xobjid, bbox)
             interpreter.render_contents(xobj.dic.get("Resources"), [xobj], ctm=ctm)
             self.device.end_figure(xobjid)
         elif subtype == LITERAL_IMAGE and "Width" in xobj.dic and "Height" in xobj.dic:
             (x0, y0) = apply_matrix(self.ctm, (0, 0))
             (x1, y1) = apply_matrix(self.ctm, (1, 1))
             self.device.begin_figure(xobjid, (x0, y0, x1, y1))
             (w, h) = (xobj.dic["Width"], xobj.dic["Height"])
             self.device.render_image(xobj, (w, h), self.ctm)
             self.device.end_figure(xobjid)
     except TypeError:
         pass
     else:
         # unsupported xobject type.
         pass
     return
Example #5
0
 def do_Do(self, xobjid):
     xobjid = literal_name(xobjid)
     try:
         xobj = stream_value(self.xobjmap[xobjid])
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
         return
     if 1 <= self.debug:
         print >>sys.stderr, 'Processing xobj: %r' % xobj
     subtype = xobj.get('Subtype')
     if subtype is LITERAL_FORM and 'BBox' in xobj:
         interpreter = self.dup()
         bbox = list_value(xobj['BBox'])
         matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
         # According to PDF reference 1.7 section 4.9.1, XObjects in 
         # earlier PDFs (prior to v1.2) use the page's Resources entry
         # instead of having their own Resources entry.
         resources = dict_value(xobj.get('Resources')) or self.resources.copy()
         self.device.begin_figure(xobjid, bbox, matrix)
         interpreter.render_contents(resources, [xobj], ctm=mult_matrix(matrix, self.ctm))
         self.device.end_figure(xobjid)
     elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj:
         self.device.begin_figure(xobjid, (0,0,1,1), MATRIX_IDENTITY)
         self.device.render_image(xobjid, xobj)
         self.device.end_figure(xobjid)
     else:
         # unsupported xobject type.
         pass
     return
 def do_cs(self, name):
     try:
         self.ncs = self.csmap[literal_name(name)]
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
     return
Example #7
0
 def do_keyword(self, pos, token):
     if token is self.KEYWORD_PUT:
         ((_,key),(_,value)) = self.pop(2)
         if (isinstance(key, int) and
             isinstance(value, PSLiteral)):
             self.add_results((key, literal_name(value)))
     return
 def do_Do(self, xobjid):
     xobjid = literal_name(xobjid)
     try:
         xobj = stream_value(self.xobjmap[xobjid])
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
         return
     if 1 <= self.debug:
         print >>sys.stderr, 'Processing xobj: %r' % xobj
     subtype = xobj.get('Subtype')
     if subtype is LITERAL_FORM and 'BBox' in xobj:
         interpreter = self.dup()
         bbox = list_value(xobj['BBox'])
         matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
         # According to PDF reference 1.7 section 4.9.1, XObjects in 
         # earlier PDFs (prior to v1.2) use the page's Resources entry
         # instead of having their own Resources entry.
         resources = dict_value(xobj.get('Resources')) or self.resources.copy()
         self.device.begin_figure(xobjid, bbox, matrix)
         interpreter.render_contents(resources, [xobj], ctm=mult_matrix(matrix, self.ctm))
         self.device.end_figure(xobjid)
     elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj:
         self.device.begin_figure(xobjid, (0,0,1,1), MATRIX_IDENTITY)
         self.device.render_image(xobjid, xobj)
         self.device.end_figure(xobjid)
     else:
         # unsupported xobject type.
         pass
     return
Example #9
0
 def do_Do(self, xobjid):
     xobjid = literal_name(xobjid)
     try:
         xobj = stream_value(self.xobjmap[xobjid])
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
         return
     if 1 <= self.debug:
         print >>stderr, 'Processing xobj: %r' % xobj
     subtype = xobj.get('Subtype')
     if subtype is LITERAL_FORM and 'BBox' in xobj:
         interpreter = self.dup()
         bbox = list_value(xobj['BBox'])
         matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
         self.device.begin_figure(xobjid, bbox, matrix)
         interpreter.render_contents(dict_value(xobj.get('Resources')), [xobj], ctm=mult_matrix(matrix, self.ctm))
         self.device.end_figure(xobjid)
     elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj:
         self.device.begin_figure(xobjid, (0,0,1,1), MATRIX_IDENTITY)
         self.device.render_image(xobjid, xobj)
         self.device.end_figure(xobjid)
     else:
         # unsupported xobject type.
         pass
     return
Example #10
0
 def do_cs(self, name):
     try:
         self.ncs = self.csmap[literal_name(name)]
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined ColorSpace: %r' % name)
     return
Example #11
0
 def do_Do(self, xobjid):
     xobjid = literal_name(xobjid)
     try:
         xobj = stream_value(self.xobjmap[xobjid])
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
         return
     if 1 <= self.debug:
         print >> stderr, 'Processing xobj: %r' % xobj
     subtype = xobj.dic.get('Subtype')
     if subtype is LITERAL_FORM and 'BBox' in xobj.dic:
         interpreter = self.dup()
         (x0, y0, x1, y1) = list_value(xobj.dic['BBox'])
         ctm = mult_matrix(
             list_value(xobj.dic.get('Matrix', MATRIX_IDENTITY)), self.ctm)
         (x0, y0) = apply_matrix(ctm, (x0, y0))
         (x1, y1) = apply_matrix(ctm, (x1, y1))
         bbox = (x0, y0, x1, y1)
         self.device.begin_figure(xobjid, bbox)
         interpreter.render_contents(dict_value(xobj.dic.get('Resources')),
                                     [xobj],
                                     ctm=ctm)
         self.device.end_figure(xobjid)
     elif subtype is LITERAL_IMAGE and 'Width' in xobj.dic and 'Height' in xobj.dic:
         (x0, y0) = apply_matrix(self.ctm, (0, 0))
         (x1, y1) = apply_matrix(self.ctm, (1, 1))
         self.device.begin_figure(xobjid, (x0, y0, x1, y1))
         (w, h) = (xobj.dic['Width'], xobj.dic['Height'])
         self.device.render_image(xobj, (w, h), self.ctm)
         self.device.end_figure(xobjid)
     else:
         # unsupported xobject type.
         pass
     return
Example #12
0
 def do_Do(self, xobjid):
     xobjid = literal_name(xobjid)
     try:
         xobj = stream_value(self.xobjmap[xobjid])
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
         return
     if 1 <= self.debug:
         print >>stderr, 'Processing xobj: %r' % xobj
     subtype = xobj.get('Subtype')
     if subtype is LITERAL_FORM and 'BBox' in xobj:
         interpreter = self.dup()
         bbox = list_value(xobj['BBox'])
         matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
         self.device.begin_figure(xobjid, bbox, matrix)
         interpreter.render_contents(dict_value(xobj.get('Resources')), [xobj], ctm=mult_matrix(matrix, self.ctm))
         self.device.end_figure(xobjid)
     elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj:
         self.device.begin_figure(xobjid, (0,0,1,1), MATRIX_IDENTITY)
         self.device.render_image(xobjid, xobj)
         self.device.end_figure(xobjid)
     else:
         # unsupported xobject type.
         pass
     return
Example #13
0
 def do_Tf(self, fontid, fontsize):
     """selectfont"""
     try:
         self.textstate.font = self.fontmap[literal_name(fontid)]
     except KeyError:
         handle_error(PDFInterpreterError, 'Undefined Font id: %r' % fontid)
         self.textstate.font = PDFType1Font(self.rsrcmgr, {'BaseFont': 'Times-Roman'})
     self.textstate.fontsize = fontsize
Example #14
0
 def decrypt(self, objid, genno, data, attrs=None, name=None):
     if not self.encrypt_metadata and attrs is not None:
         t = attrs.get('Type')
         if t is not None and literal_name(t) == 'Metadata':
             return data
     if name is None:
         name = self.strf
     return self.cfm[name](objid, genno, data)
Example #15
0
    def initialize(self, password=''):
        """Perform the initialization with a given password.

        This step is mandatory even if there's no password associated with the document.
        """
        if not self.encryption:
            self.is_printable = self.is_modifiable = self.is_extractable = True
            return
        (docid, param) = self.encryption
        if literal_name(param.get('Filter')) != 'Standard':
            raise PDFEncryptionError('Unknown filter: param=%r' % param)
        V = int_value(param.get('V', 0))
        if not (V == 1 or V == 2):
            raise PDFEncryptionError('Unknown algorithm: param=%r' % param)
        length = int_value(param.get('Length', 40))  # Key length (bits)
        O = str_value(param['O'])
        R = int_value(param['R'])  # Revision
        if 5 <= R:
            raise PDFEncryptionError('Unknown revision: %r' % R)
        U = str_value(param['U'])
        P = int_value(param['P'])
        self.is_printable = bool(P & 4)
        self.is_modifiable = bool(P & 8)
        self.is_extractable = bool(P & 16)
        # Algorithm 3.2
        password = (password + self.PASSWORD_PADDING)[:32]  # 1
        md5hash = md5.md5(password)  # 2
        md5hash.update(O)  # 3
        md5hash.update(struct.pack('<l', P))  # 4
        md5hash.update(docid[0])  # 5
        if 4 <= R:
            # 6
            raise PDFNotImplementedError('Revision 4 encryption is currently unsupported')
        if 3 <= R:
            # 8
            for _ in xrange(50):
                md5hash = md5.md5(md5hash.digest()[:length / 8])
        key = md5hash.digest()[:length / 8]
        if R == 2:
            # Algorithm 3.4
            u1 = Arcfour(key).process(self.PASSWORD_PADDING)
        elif R == 3:
            # Algorithm 3.5
            md5hash = md5.md5(self.PASSWORD_PADDING)  # 2
            md5hash.update(docid[0])  # 3
            x = Arcfour(key).process(md5hash.digest()[:16])  # 4
            for i in xrange(1, 19 + 1):
                k = ''.join(chr(ord(c) ^ i) for c in key)
                x = Arcfour(k).process(x)
            u1 = x + x  # 32bytes total
        if R == 2:
            is_authenticated = (u1 == U)
        else:
            is_authenticated = (u1[:16] == U[:16])
        if not is_authenticated:
            raise PDFPasswordIncorrect
        self.decrypt_key = key
        self.decipher = self.decrypt_rc4  # XXX may be AES
Example #16
0
 def initialize(self, password=''):
     if not self.encryption:
         self.is_printable = self.is_modifiable = self.is_extractable = True
         self._initialized = True
         return
     (docid, param) = self.encryption
     if literal_name(param['Filter']) != 'Standard':
         raise PDFEncryptionError('Unknown filter: param=%r' % param)
     V = int_value(param.get('V', 0))
     if not (V == 1 or V == 2):
         raise PDFEncryptionError('Unknown algorithm: param=%r' % param)
     length = int_value(param.get('Length', 40))  # Key length (bits)
     O = str_value(param['O'])
     R = int_value(param['R'])  # Revision
     if 5 <= R:
         raise PDFEncryptionError('Unknown revision: %r' % R)
     U = str_value(param['U'])
     P = int_value(param['P'])
     self.is_printable = bool(P & 4)
     self.is_modifiable = bool(P & 8)
     self.is_extractable = bool(P & 16)
     # Algorithm 3.2
     password = (password + self.PASSWORD_PADDING)[:32]  # 1
     hash = md5.md5(password)  # 2
     hash.update(O)  # 3
     hash.update(struct.pack('<l', P))  # 4
     hash.update(docid[0])  # 5
     if 4 <= R:
         # 6
         raise PDFNotImplementedError(
             'Revision 4 encryption is currently unsupported')
     if 3 <= R:
         # 8
         for _ in xrange(50):
             hash = md5.md5(hash.digest()[:length / 8])
     key = hash.digest()[:length / 8]
     if R == 2:
         # Algorithm 3.4
         u1 = Arcfour(key).process(self.PASSWORD_PADDING)
     elif R == 3:
         # Algorithm 3.5
         hash = md5.md5(self.PASSWORD_PADDING)  # 2
         hash.update(docid[0])  # 3
         x = Arcfour(key).process(hash.digest()[:16])  # 4
         for i in xrange(1, 19 + 1):
             k = ''.join(chr(ord(c) ^ i) for c in key)
             x = Arcfour(k).process(x)
         u1 = x + x  # 32bytes total
     if R == 2:
         is_authenticated = (u1 == U)
     else:
         is_authenticated = (u1[:16] == U[:16])
     if not is_authenticated:
         raise PDFPasswordIncorrect
     self.decrypt_key = key
     self.decipher = self.decrypt_rc4  # XXX may be AES
     self._initialized = True
     return
Example #17
0
 def do_Tf(self, fontid, fontsize):
     try:
         self.textstate.font = self.fontmap[literal_name(fontid)]
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined Font id: %r' % fontid)
         return
     self.textstate.fontsize = fontsize
     return
Example #18
0
 def do_Tf(self, fontid, fontsize):
     try:
         self.textstate.font = self.fontmap[literal_name(fontid)]
     except KeyError:
         if STRICT:
             raise PDFInterpreterError("Undefined Font id: %r" % fontid)
         self.textstate.font = self.rsrcmgr.get_font(None, {})
     self.textstate.fontsize = fontsize
     return
Example #19
0
 def do_Tf(self, fontid, fontsize):
     try:
         self.textstate.font = self.fontmap[literal_name(fontid)]
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined Font id: %r' % fontid)
         self.textstate.font = self.rsrcmgr.get_font(None, {})
     self.textstate.fontsize = fontsize
     return
Example #20
0
 def init_params(self):
     super(PDFStandardSecurityHandlerV4, self).init_params()
     self.length = 128
     self.cf = dict_value(self.param.get('CF'))
     self.stmf = literal_name(self.param['StmF'])
     self.strf = literal_name(self.param['StrF'])
     self.encrypt_metadata = bool(self.param.get('EncryptMetadata', True))
     if self.stmf != self.strf:
         raise PDFEncryptionError('Unsupported crypt filter: param=%r' % self.param)
     self.cfm = {}
     for k, v in self.cf.items():
         f = self.get_cfm(literal_name(v['CFM']))
         if f is None:
             raise PDFEncryptionError('Unknown crypt filter method: param=%r' % self.param)
         self.cfm[k] = f
     self.cfm['Identity'] = self.decrypt_identity
     if self.strf not in self.cfm:
         raise PDFEncryptionError('Undefined crypt filter: param=%r' % self.param)
Example #21
0
 def do_Tf(self, fontid, fontsize):
     try:
         self.textstate.font = self.fontmap[literal_name(fontid)]
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined Font id: %r' % fontid)
         self.textstate.font = PDFType1Font(
             self.rsrcmgr, {'BaseFont':'Times-Roman'})
     self.textstate.fontsize = fontsize
     return
Example #22
0
 def do_Tf(self, fontid, fontsize):
     try:
         self.textstate.font = self.fontmap[literal_name(fontid)]
     except KeyError:
         raise
         if STRICT:
             raise PDFInterpreterError('Undefined Font id: %r' % fontid)
         return
     self.textstate.fontsize = fontsize
     return
Example #23
0
 def __init__(self, rsrcmgr, spec):
     try:
         self.basefont = literal_name(spec['BaseFont'])
     except KeyError:
         handle_error(PDFFontError, 'BaseFont is missing')
         self.basefont = 'unknown'
     self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
     self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', 'unknown'),
                                 self.cidsysteminfo.get('Ordering', 'unknown'))
     try:
         name = literal_name(spec['Encoding'])
     except KeyError:
         handle_error(PDFFontError, 'Encoding is unspecified')
         name = 'unknown'
     try:
         self.cmap = CMapDB.get_cmap(name)
     except CMapDB.CMapNotFound, e:
         handle_error(PDFFontError, str(e))
         self.cmap = CMap()
Example #24
0
 def __init__(self, descriptor, widths, spec):
     # Font encoding is specified either by a name of
     # built-in encoding or a dictionary that describes
     # the differences.
     if 'Encoding' in spec:
         encoding = resolve1(spec['Encoding'])
     else:
         encoding = LITERAL_STANDARD_ENCODING
     if isinstance(encoding, dict):
         name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING))
         diff = list_value(encoding.get('Differences', None))
         self.cid2unicode = EncodingDB.get_encoding(name, diff)
     else:
         self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding))
     self.unicode_map = None
     if 'ToUnicode' in spec:
         strm = stream_value(spec['ToUnicode'])
         self.unicode_map = FileUnicodeMap()
         CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
     PDFFont.__init__(self, descriptor, widths)
Example #25
0
 def __init__(self, descriptor, widths, default_width=None):
   self.descriptor = descriptor
   self.widths = widths
   self.fontname = descriptor.get('FontName', 'unknown')
   if isinstance(self.fontname, PSLiteral):
     self.fontname = literal_name(self.fontname)
   self.ascent = num_value(descriptor.get('Ascent', 0))
   self.descent = num_value(descriptor.get('Descent', 0))
   self.default_width = default_width or descriptor.get('MissingWidth', 0)
   self.leading = num_value(descriptor.get('Leading', 0))
   self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0)))
   return
Example #26
0
 def __init__(self, descriptor, widths, spec):
     # Font encoding is specified either by a name of
     # built-in encoding or a dictionary that describes
     # the differences.
     if 'Encoding' in spec:
         encoding = resolve1(spec['Encoding'])
     else:
         encoding = LITERAL_STANDARD_ENCODING
     if isinstance(encoding, dict):
         name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING))
         diff = list_value(encoding.get('Differences', None))
         self.cid2unicode = EncodingDB.get_encoding(name, diff)
     else:
         self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding))
     self.unicode_map = None
     if 'ToUnicode' in spec:
         strm = stream_value(spec['ToUnicode'])
         self.unicode_map = FileUnicodeMap()
         CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
     PDFFont.__init__(self, descriptor, widths)
     return
Example #27
0
 def __init__(self, descriptor, widths, spec):
     # Font encoding is specified either by a name of
     # built-in encoding or a dictionary that describes
     # the differences.
     if "Encoding" in spec:
         encoding = resolve1(spec["Encoding"])
     else:
         encoding = LITERAL_STANDARD_ENCODING
     if isinstance(encoding, dict):
         name = literal_name(encoding.get("BaseEncoding", LITERAL_STANDARD_ENCODING))
         diff = list_value(encoding.get("Differences", None))
         self.encoding = EncodingDB.get_encoding(name, diff)
     else:
         self.encoding = EncodingDB.get_encoding(literal_name(encoding))
     self.ucs2_cmap = None
     if "ToUnicode" in spec:
         strm = stream_value(spec["ToUnicode"])
         self.ucs2_cmap = CMap()
         CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run()
     PDFFont.__init__(self, descriptor, widths)
     return
Example #28
0
 def __init__(self, rsrc, spec):
     try:
         self.basefont = literal_name(spec['BaseFont'])
     except KeyError:
         if STRICT:
             raise PDFFontError('BaseFont is missing')
         self.basefont = 'unknown'
     self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
     self.cidcoding = '%s-%s' % (self.cidsysteminfo.get(
         'Registry',
         'unknown'), self.cidsysteminfo.get('Ordering', 'unknown'))
     try:
         name = literal_name(spec['Encoding'])
     except KeyError:
         if STRICT:
             raise PDFFontError('Encoding is unspecified')
         name = 'unknown'
     try:
         self.cmap = rsrc.get_cmap(name, strict=STRICT)
     except CMapDB.CMapNotFound, e:
         raise PDFFontError(e)
Example #29
0
 def __init__(self, descriptor, widths, default_width=None):
     self.descriptor = descriptor
     self.widths = widths
     self.fontname = descriptor.get('FontName', 'unknown')
     if isinstance(self.fontname, PSLiteral):
         self.fontname = literal_name(self.fontname)
     self.ascent = num_value(descriptor.get('Ascent', 0))
     self.descent = num_value(descriptor.get('Descent', 0))
     self.default_width = default_width or descriptor.get('MissingWidth', 0)
     self.leading = num_value(descriptor.get('Leading', 0))
     self.bbox = list_value(descriptor.get('FontBBox', (0, 0, 0, 0)))
     return
Example #30
0
 def __init__(self, descriptor, widths, default_width=None):
     self.descriptor = descriptor
     self.widths = widths
     self.fontname = descriptor.get("FontName", "unknown")
     if isinstance(self.fontname, PSLiteral):
         self.fontname = literal_name(self.fontname)
     self.ascent = num_value(descriptor.get("Ascent", 0))
     self.descent = num_value(descriptor.get("Descent", 0))
     self.default_width = default_width or descriptor.get("MissingWidth", 0)
     self.leading = num_value(descriptor.get("Leading", 0))
     self.bbox = list_value(descriptor.get("FontBBox", (0, 0, 0, 0)))
     return
Example #31
0
 def __init__(self, spec):
     try:
         self.basefont = literal_name(spec["BaseFont"])
     except KeyError:
         if STRICT:
             raise PDFFontError("BaseFont is missing")
         self.basefont = "unknown"
     self.cidsysteminfo = dict_value(spec.get("CIDSystemInfo", {}))
     self.cidcoding = "%s-%s" % (
         self.cidsysteminfo.get("Registry", "unknown"),
         self.cidsysteminfo.get("Ordering", "unknown"),
     )
     try:
         name = literal_name(spec["Encoding"])
     except KeyError:
         if STRICT:
             raise PDFFontError("Encoding not specified")
         name = "unknown"
     try:
         self.cmap = CMapDB.get_cmap(name, strict=STRICT)
     except CMapDB.CMapNotFound, e:
         raise PDFFontError(e)
Example #32
0
 def __init__(self, descriptor, widths, default_width=None):
     self.descriptor = descriptor
     self.widths = widths
     self.fontname = resolve1(descriptor.get('FontName', 'unknown'))
     if isinstance(self.fontname, PSLiteral):
         self.fontname = literal_name(self.fontname)
     self.flags = int_value(descriptor.get('Flags', 0))
     self.ascent = num_value(descriptor.get('Ascent', 0))
     self.descent = num_value(descriptor.get('Descent', 0))
     self.italic_angle = num_value(descriptor.get('ItalicAngle', 0))
     self.default_width = default_width or num_value(descriptor.get('MissingWidth', 0))
     self.leading = num_value(descriptor.get('Leading', 0))
     self.bbox = list_value(descriptor.get('FontBBox', (0, 0, 0, 0)))
     self.hscale = self.vscale = .001
Example #33
0
 def __init__(self, descriptor, widths, default_width=None):
     self.descriptor = descriptor
     self.widths = widths
     self.fontname = resolve1(descriptor.get("FontName", "unknown"))
     if isinstance(self.fontname, PSLiteral):
         self.fontname = literal_name(self.fontname)
     self.flags = int_value(descriptor.get("Flags", 0))
     self.ascent = num_value(descriptor.get("Ascent", 0))
     self.descent = num_value(descriptor.get("Descent", 0))
     self.italic_angle = num_value(descriptor.get("ItalicAngle", 0))
     self.default_width = default_width or num_value(descriptor.get("MissingWidth", 0))
     self.leading = num_value(descriptor.get("Leading", 0))
     self.bbox = list_value(descriptor.get("FontBBox", (0, 0, 0, 0)))
     self.hscale = self.vscale = 0.001
     return
Example #34
0
 def __init__(self, descriptor, widths, default_width=None):
     self.descriptor = descriptor
     self.widths = widths
     self.fontname = resolve1(descriptor.get('FontName', 'unknown'))
     if isinstance(self.fontname, PSLiteral):
         self.fontname = literal_name(self.fontname)
     self.flags = int_value(descriptor.get('Flags', 0))
     self.ascent = num_value(descriptor.get('Ascent', 0))
     self.descent = num_value(descriptor.get('Descent', 0))
     self.italic_angle = num_value(descriptor.get('ItalicAngle', 0))
     self.default_width = default_width or num_value(descriptor.get('MissingWidth', 0))
     self.leading = num_value(descriptor.get('Leading', 0))
     self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0)))
     self.hscale = self.vscale = .001
     return
Example #35
0
 def _initialize_password(self, password=''):
     (docid, param) = self.encryption
     if literal_name(param.get('Filter')) != 'Standard':
         raise PDFEncryptionError('Unknown filter: param=%r' % param)
     v = int_value(param.get('V', 0))
     factory = self.security_handler_registry.get(v)
     if factory is None:
         raise PDFEncryptionError('Unknown algorithm: param=%r' % param)
     handler = factory(docid, param, password)
     self.decipher = handler.decrypt
     self.is_printable = handler.is_printable()
     self.is_modifiable = handler.is_modifiable()
     self.is_extractable = handler.is_extractable()
     self._parser.fallback = False # need to read streams with exact length
     return
Example #36
0
 def do_keyword(self, pos, token):
     if token is self.KEYWORD_BI:
         # inline image within a content stream
         self.start_type(pos, 'inline')
     elif token is self.KEYWORD_ID:
         try:
             (_, objs) = self.end_type('inline')
             if len(objs) % 2 != 0:
                 raise PSTypeError('Invalid dictionary construct: %r' % objs)
             d = dict((literal_name(k), v) for (k, v) in choplist(2, objs))
             (pos, data) = self.get_inline_data(pos + len('ID '))
             obj = PDFStream(d, data)
             self.push((pos, obj))
             self.push((pos, self.KEYWORD_EI))
         except PSTypeError, e:
             handle_error(type(e), str(e))
Example #37
0
 def __init__(self, rsrc, spec):
     try:
         self.basefont = literal_name(spec['BaseFont'])
     except KeyError:
         if STRICT:
             raise PDFFontError('BaseFont is missing')
         self.basefont = 'unknown'
     try:
         (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
     except KeyError:
         descriptor = dict_value(spec.get('FontDescriptor', {}))
         firstchar = int_value(spec.get('FirstChar', 0))
         lastchar = int_value(spec.get('LastChar', 255))
         widths = list_value(spec.get('Widths', [0] * 256))
         widths = dict((i + firstchar, w) for (i, w) in enumerate(widths))
     PDFSimpleFont.__init__(self, descriptor, widths, spec)
     return
Example #38
0
 def __init__(self, spec):
     try:
         self.basefont = literal_name(spec["BaseFont"])
     except KeyError:
         if STRICT:
             raise PDFFontError("BaseFont is missing")
         self.basefont = "unknown"
     try:
         (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
     except KeyError:
         descriptor = dict_value(spec.get("FontDescriptor", {}))
         firstchar = int_value(spec.get("FirstChar", 0))
         lastchar = int_value(spec.get("LastChar", 255))
         widths = list_value(spec.get("Widths", [0] * 256))
         widths = dict((i + firstchar, w) for (i, w) in enumerate(widths))
     PDFSimpleFont.__init__(self, descriptor, widths, spec)
     return
Example #39
0
 def __init__(self, rsrc, spec):
     try:
         self.basefont = literal_name(spec['BaseFont'])
     except KeyError:
         if STRICT:
             raise PDFFontError('BaseFont is missing')
         self.basefont = 'unknown'
     try:
         (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
     except KeyError:
         descriptor = dict_value(spec.get('FontDescriptor', {}))
         firstchar = int_value(spec.get('FirstChar', 0))
         lastchar = int_value(spec.get('LastChar', 255))
         widths = list_value(spec.get('Widths', [0]*256))
         widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths) )
     PDFSimpleFont.__init__(self, descriptor, widths, spec)
     return
Example #40
0
 def get_font(self, objid, spec):
     if objid and objid in self._cached_fonts:
         font = self._cached_fonts[objid]
     else:
         if 2 <= self.debug:
             print >> sys.stderr, 'get_font: create: objid=%r, spec=%r' % (
                 objid, spec)
         if STRICT:
             if spec['Type'] is not LITERAL_FONT:
                 raise PDFFontError('Type is not /Font')
         # Create a Font object.
         if 'Subtype' in spec:
             subtype = literal_name(spec['Subtype'])
         else:
             if STRICT:
                 raise PDFFontError('Font Subtype is not specified.')
             subtype = 'Type1'
         if subtype in ('Type1', 'MMType1'):
             # Type1 Font
             font = PDFType1Font(self, spec)
         elif subtype == 'TrueType':
             # TrueType Font
             font = PDFTrueTypeFont(self, spec)
         elif subtype == 'Type3':
             # Type3 Font
             font = PDFType3Font(self, spec)
         elif subtype in ('CIDFontType0', 'CIDFontType2'):
             # CID Font
             font = PDFCIDFont(self, spec)
         elif subtype == 'Type0':
             # Type0 Font
             dfonts = list_value(spec['DescendantFonts'])
             assert dfonts
             subspec = dict_value(dfonts[0]).copy()
             for k in ('Encoding', 'ToUnicode'):
                 if k in spec:
                     subspec[k] = resolve1(spec[k])
             font = self.get_font(None, subspec)
         else:
             if STRICT:
                 raise PDFFontError('Invalid Font spec: %r' % spec)
             font = PDFType1Font(self, spec)  # this is so wrong!
         if objid and self.caching:
             self._cached_fonts[objid] = font
     return font
Example #41
0
 def get_font(self, objid, spec):
     if objid and objid in self._cached_fonts:
         font = self._cached_fonts[objid]
     else:
         if 2 <= self.debug:
             print >>sys.stderr, "get_font: create: objid=%r, spec=%r" % (objid, spec)
         if STRICT:
             if spec["Type"] is not LITERAL_FONT:
                 raise PDFFontError("Type is not /Font")
         # Create a Font object.
         if "Subtype" in spec:
             subtype = literal_name(spec["Subtype"])
         else:
             if STRICT:
                 raise PDFFontError("Font Subtype is not specified.")
             subtype = "Type1"
         if subtype in ("Type1", "MMType1"):
             # Type1 Font
             font = PDFType1Font(self, spec)
         elif subtype == "TrueType":
             # TrueType Font
             font = PDFTrueTypeFont(self, spec)
         elif subtype == "Type3":
             # Type3 Font
             font = PDFType3Font(self, spec)
         elif subtype in ("CIDFontType0", "CIDFontType2"):
             # CID Font
             font = PDFCIDFont(self, spec)
         elif subtype == "Type0":
             # Type0 Font
             dfonts = list_value(spec["DescendantFonts"])
             assert dfonts
             subspec = dict_value(dfonts[0]).copy()
             for k in ("Encoding", "ToUnicode"):
                 if k in spec:
                     subspec[k] = resolve1(spec[k])
             font = self.get_font(None, subspec)
         else:
             if STRICT:
                 raise PDFFontError("Invalid Font spec: %r" % spec)
             font = PDFType1Font(self, spec)  # this is so wrong!
         if objid and self.caching:
             self._cached_fonts[objid] = font
     return font
 def get_font(self, objid, spec):
     if objid and objid in self._cached_fonts:
         font = self._cached_fonts[objid]
     else:
         if 2 <= self.debug:
             print >>sys.stderr, 'get_font: create: objid=%r, spec=%r' % (objid, spec)
         if STRICT:
             if spec['Type'] is not LITERAL_FONT:
                 raise PDFFontError('Type is not /Font')
         # Create a Font object.
         if 'Subtype' in spec:
             subtype = literal_name(spec['Subtype'])
         else:
             if STRICT:
                 raise PDFFontError('Font Subtype is not specified.')
             subtype = 'Type1'
         if subtype in ('Type1', 'MMType1'):
             # Type1 Font
             font = PDFType1Font(self, spec)
         elif subtype == 'TrueType':
             # TrueType Font
             font = PDFTrueTypeFont(self, spec)
         elif subtype == 'Type3':
             # Type3 Font
             font = PDFType3Font(self, spec)
         elif subtype in ('CIDFontType0', 'CIDFontType2'):
             # CID Font
             font = PDFCIDFont(self, spec)
         elif subtype == 'Type0':
             # Type0 Font
             dfonts = list_value(spec['DescendantFonts'])
             assert dfonts
             subspec = dict_value(dfonts[0]).copy()
             for k in ('Encoding', 'ToUnicode'):
                 if k in spec:
                     subspec[k] = resolve1(spec[k])
             font = self.get_font(None, subspec)
         else:
             if STRICT:
                 raise PDFFontError('Invalid Font spec: %r' % spec)
             font = PDFType1Font(self, spec) # this is so wrong!
         if objid and self.caching:
             self._cached_fonts[objid] = font
     return font
Example #43
0
 def do_keyword(self, pos, token):
     if token is self.KEYWORD_BI:
         # inline image within a content stream
         self.start_type(pos, 'inline')
     elif token is self.KEYWORD_ID:
         try:
             (_, objs) = self.end_type('inline')
             if len(objs) % 2 != 0:
                 raise PSTypeError('Invalid dictionary construct: %r' % objs)
             d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) )
             (pos, data) = self.get_inline_data(pos+len('ID '))
             obj = PDFStream(d, data)
             self.push((pos, obj))
             self.push((pos, self.KEYWORD_EI))
         except PSTypeError:
             if STRICT: raise
     else:
         self.push((pos, token))
     return
Example #44
0
 def do_keyword(self, pos, token):
     if token is self.KEYWORD_BI:
         # inline image within a content stream
         self.start_type(pos, "inline")
     elif token is self.KEYWORD_ID:
         try:
             (_, objs) = self.end_type("inline")
             if len(objs) % 2 != 0:
                 raise PSTypeError("Invalid dictionary construct: %r" % objs)
             d = dict((literal_name(k), v) for (k, v) in choplist(2, objs))
             (pos, data) = self.get_inline_data(pos + len("ID "))
             obj = PDFStream(d, data)
             self.push((pos, obj))
             self.push((pos, self.KEYWORD_EI))
         except PSTypeError:
             if STRICT:
                 raise
     else:
         self.push((pos, token))
     return
Example #45
0
 def __init__(self, rsrcmgr, spec):
     try:
         self.basefont = literal_name(spec['BaseFont'])
     except KeyError:
         handle_error(PDFFontError, 'BaseFont is missing')
         self.basefont = 'unknown'
     try:
         (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
     except KeyError:
         descriptor = dict_value(spec.get('FontDescriptor', {}))
         firstchar = int_value(spec.get('FirstChar', 0))
         lastchar = int_value(spec.get('LastChar', 255))
         widths = list_value(spec.get('Widths', [0] * 256))
         widths = dict((i+firstchar, w) for (i, w) in enumerate(widths))
     PDFSimpleFont.__init__(self, descriptor, widths, spec)
     if 'Encoding' not in spec and 'FontFile' in descriptor:
         # try to recover the missing encoding info from the font file.
         self.fontfile = stream_value(descriptor.get('FontFile'))
         length1 = int_value(self.fontfile['Length1'])
         data = self.fontfile.get_data()[:length1]
         parser = Type1FontHeaderParser(StringIO(data))
         self.cid2unicode = parser.get_encoding()
Example #46
0
 def __init__(self, rsrcmgr, spec):
     try:
         self.basefont = literal_name(spec['BaseFont'])
     except KeyError:
         if STRICT:
             raise PDFFontError('BaseFont is missing')
         self.basefont = 'unknown'
     try:
         (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
     except KeyError:
         descriptor = dict_value(spec.get('FontDescriptor', {}))
         firstchar = int_value(spec.get('FirstChar', 0))
         lastchar = int_value(spec.get('LastChar', 255))
         widths = list_value(spec.get('Widths', [0]*256))
         widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths) )
     PDFSimpleFont.__init__(self, descriptor, widths, spec)
     if 'Encoding' not in spec and 'FontFile' in descriptor:
         # try to recover the missing encoding info from the font file.
         self.fontfile = stream_value(descriptor.get('FontFile'))
         length1 = int_value(self.fontfile['Length1'])
         data = self.fontfile.get_data()[:length1]
         parser = Type1FontHeaderParser(StringIO(data))
         self.cid2unicode = parser.get_encoding()
     return
Example #47
0
 def do_cs(self, name):
     self.ncs = self.csmap[literal_name(name)]
     return
Example #48
0
 def do_CS(self, name):
     self.scs = self.csmap[literal_name(name)]
     return
    def do_keyword(self, pos, token):
        name = token.name
        if name == 'begincmap':
            self._in_cmap = True
            self.popall()
            return
        elif name == 'endcmap':
            self._in_cmap = False
            return
        if not self._in_cmap:
            return
        #
        if name == 'def':
            try:
                ((_, k), (_, v)) = self.pop(2)
                self.cmap.set_attr(literal_name(k), v)
            except PSSyntaxError:
                pass
            return

        if name == 'usecmap':
            try:
                ((_, cmapname), ) = self.pop(1)
                self.cmap.use_cmap(CMapDB.get_cmap(literal_name(cmapname)))
            except PSSyntaxError:
                pass
            except CMapDB.CMapNotFound:
                pass
            return

        if name == 'begincodespacerange':
            self.popall()
            return
        if name == 'endcodespacerange':
            self.popall()
            return

        if name == 'begincidrange':
            self.popall()
            return
        if name == 'endcidrange':
            objs = [obj for (__, obj) in self.popall()]
            for (s, e, cid) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str)
                        or not isinstance(cid, int) or len(s) != len(e)):
                    continue
                sprefix = s[:-4]
                eprefix = e[:-4]
                if sprefix != eprefix:
                    continue
                svar = s[-4:]
                evar = e[-4:]
                s1 = nunpack(svar)
                e1 = nunpack(evar)
                vlen = len(svar)
                #assert s1 <= e1
                for i in xrange(e1 - s1 + 1):
                    x = sprefix + struct.pack('>L', s1 + i)[-vlen:]
                    self.cmap.add_code2cid(x, cid + i)
            return

        if name == 'begincidchar':
            self.popall()
            return
        if name == 'endcidchar':
            objs = [obj for (__, obj) in self.popall()]
            for (cid, code) in choplist(2, objs):
                if isinstance(code, str) and isinstance(cid, str):
                    self.cmap.add_code2cid(code, nunpack(cid))
            return

        if name == 'beginbfrange':
            self.popall()
            return
        if name == 'endbfrange':
            objs = [obj for (__, obj) in self.popall()]
            for (s, e, code) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str)
                        or len(s) != len(e)):
                    continue
                s1 = nunpack(s)
                e1 = nunpack(e)
                #assert s1 <= e1
                if isinstance(code, list):
                    for i in xrange(e1 - s1 + 1):
                        self.cmap.add_cid2unichr(s1 + i, code[i])
                else:
                    var = code[-4:]
                    base = nunpack(var)
                    prefix = code[:-4]
                    vlen = len(var)
                    for i in xrange(e1 - s1 + 1):
                        x = prefix + struct.pack('>L', base + i)[-vlen:]
                        self.cmap.add_cid2unichr(s1 + i, x)
            return

        if name == 'beginbfchar':
            self.popall()
            return
        if name == 'endbfchar':
            objs = [obj for (__, obj) in self.popall()]
            for (cid, code) in choplist(2, objs):
                if isinstance(cid, str) and isinstance(code, str):
                    self.cmap.add_cid2unichr(nunpack(cid), code)
            return

        if name == 'beginnotdefrange':
            self.popall()
            return
        if name == 'endnotdefrange':
            self.popall()
            return

        self.push((pos, token))
        return
Example #50
0
    def __init__(self, rsrcmgr, spec):
        try:
            self.basefont = literal_name(spec['BaseFont'])
        except KeyError:
            if STRICT:
                raise PDFFontError('BaseFont is missing')
            self.basefont = 'unknown'
        self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
        self.cidcoding = '%s-%s' % (self.cidsysteminfo.get(
            'Registry',
            'unknown'), self.cidsysteminfo.get('Ordering', 'unknown'))
        try:
            name = literal_name(spec['Encoding'])
        except KeyError:
            if STRICT:
                raise PDFFontError('Encoding is unspecified')
            name = 'unknown'
        try:
            self.cmap = CMapDB.get_cmap(name)
        except CMapDB.CMapNotFound as e:
            if STRICT:
                raise PDFFontError(e)
            self.cmap = CMap()
        try:
            descriptor = dict_value(spec['FontDescriptor'])
        except KeyError:
            if STRICT:
                raise PDFFontError('FontDescriptor is missing')
            descriptor = {}
        ttf = None
        if 'FontFile2' in descriptor:
            self.fontfile = stream_value(descriptor.get('FontFile2'))
            ttf = TrueTypeFont(self.basefont,
                               StringIO(self.fontfile.get_data()))
        self.unicode_map = None
        if 'ToUnicode' in spec:
            strm = stream_value(spec['ToUnicode'])
            self.unicode_map = FileUnicodeMap()
            CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
        elif self.cidcoding == 'Adobe-Identity':
            if ttf:
                try:
                    self.unicode_map = ttf.create_unicode_map()
                except TrueTypeFont.CMapNotFound:
                    pass
        else:
            try:
                self.unicode_map = CMapDB.get_unicode_map(
                    self.cidcoding, self.cmap.is_vertical())
            except CMapDB.CMapNotFound as e:
                pass

        self.vertical = self.cmap.is_vertical()
        if self.vertical:
            # writing mode: vertical
            widths = get_widths2(list_value(spec.get('W2', [])))
            self.disps = dict(
                (cid, (vx, vy)) for (cid, (_, (vx, vy))) in widths.iteritems())
            (vy, w) = spec.get('DW2', [880, -1000])
            self.default_disp = (None, vy)
            widths = dict((cid, w) for (cid, (w, _)) in widths.iteritems())
            default_width = w
        else:
            # writing mode: horizontal
            self.disps = {}
            self.default_disp = 0
            widths = get_widths(list_value(spec.get('W', [])))
            default_width = spec.get('DW', 1000)
        PDFFont.__init__(self, descriptor, widths, default_width=default_width)
        return