Ejemplo n.º 1
0
 def get_colorspace(spec):
     if isinstance(spec, list):
         name = literal_name(spec[0])
     else:
         name = literal_name(spec)
     if name == 'ICCBased' and isinstance(spec, list) and 2 <= len(spec):
         return PDFColorSpace(name, stream_value(spec[1])['N'])
     elif name == 'DeviceN' and isinstance(spec, list) and 2 <= len(spec):
         return PDFColorSpace(name, len(list_value(spec[1])))
     else:
         return PREDEFINED_COLORSPACE[name]
Ejemplo n.º 2
0
 def do_Do(self, xobjid):
     xobjid = literal_name(xobjid)
     try:
         xobj = stream_value(self.xobjmap[xobjid])
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
         return
     if 1 <= self.debug:
         print(('Processing xobj: %r' % xobj))
     subtype = xobj.get('Subtype')
     if subtype is LITERAL_FORM and 'BBox' in xobj:
         interpreter = self.dup()
         bbox = list_value(xobj['BBox'])
         matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
         # According to PDF reference 1.7 section 4.9.1, XObjects in 
         # earlier PDFs (prior to v1.2) use the page's Resources entry
         # instead of having their own Resources entry.
         resources = dict_value(xobj.get('Resources')) or self.resources.copy()
         self.device.begin_figure(xobjid, bbox, matrix)
         interpreter.render_contents(resources, [xobj], ctm=mult_matrix(matrix, self.ctm))
         self.device.end_figure(xobjid)
     elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj:
         self.device.begin_figure(xobjid, (0,0,1,1), MATRIX_IDENTITY)
         self.device.render_image(xobjid, xobj)
         self.device.end_figure(xobjid)
     else:
         # unsupported xobject type.
         pass
     return
Ejemplo n.º 3
0
 def do_Tf(self, fontid, fontsize):
     try:
         self.textstate.font = self.fontmap[literal_name(fontid)]
     except KeyError:
         raise
         if STRICT:
             raise PDFInterpreterError('Undefined Font id: %r' % fontid)
         return
     self.textstate.fontsize = fontsize
     return
Ejemplo n.º 4
0
 def initialize(self, password=''):
     if not self.encryption:
         self.is_printable = self.is_modifiable = self.is_extractable = True
         return
     (docid, param) = self.encryption
     if literal_name(param.get('Filter')) != 'Standard':
         raise PDFEncryptionError('Unknown filter: param=%r' % param)
     V = int_value(param.get('V', 0))
     if not (V == 1 or V == 2):
         raise PDFEncryptionError('Unknown algorithm: param=%r' % param)
     length = int_value(param.get('Length', 40)) # Key length (bits)
     O = str_value(param['O'])
     R = int_value(param['R']) # Revision
     if 5 <= R:
         raise PDFEncryptionError('Unknown revision: %r' % R)
     U = str_value(param['U'])
     P = int_value(param['P'])
     self.is_printable = bool(P & 4)
     self.is_modifiable = bool(P & 8)
     self.is_extractable = bool(P & 16)
     # Algorithm 3.2
     password = (password+self.PASSWORD_PADDING)[:32] # 1
     hash = md5.md5(password) # 2
     hash.update(O) # 3
     hash.update(struct.pack('<l', P)) # 4
     hash.update(docid[0]) # 5
     if 4 <= R:
         # 6
         raise PDFNotImplementedError('Revision 4 encryption is currently unsupported')
     if 3 <= R:
         # 8
         for _ in range(50):
             hash = md5.md5(hash.digest()[:length/8])
     key = hash.digest()[:length/8]
     if R == 2:
         # Algorithm 3.4
         u1 = Arcfour(key).process(self.PASSWORD_PADDING)
     elif R == 3:
         # Algorithm 3.5
         hash = md5.md5(self.PASSWORD_PADDING) # 2
         hash.update(docid[0]) # 3
         x = Arcfour(key).process(hash.digest()[:16]) # 4
         for i in range(1,19+1):
             k = ''.join( chr(ord(c) ^ i) for c in key )
             x = Arcfour(k).process(x)
         u1 = x+x # 32bytes total
     if R == 2:
         is_authenticated = (u1 == U)
     else:
         is_authenticated = (u1[:16] == U[:16])
     if not is_authenticated:
         raise PDFPasswordIncorrect
     self.decrypt_key = key
     self.decipher = self.decrypt_rc4  # XXX may be AES
     return
Ejemplo n.º 5
0
 def __init__(self, descriptor, widths, spec):
     # Font encoding is specified either by a name of
     # built-in encoding or a dictionary that describes
     # the differences.
     if "Encoding" in spec:
         encoding = resolve1(spec["Encoding"])
     else:
         encoding = LITERAL_STANDARD_ENCODING
     if isinstance(encoding, dict):
         name = literal_name(encoding.get("BaseEncoding", LITERAL_STANDARD_ENCODING))
         diff = list_value(encoding.get("Differences", None))
         self.cid2unicode = EncodingDB.get_encoding(name, diff)
     else:
         self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding))
     self.unicode_map = None
     if "ToUnicode" in spec:
         strm = stream_value(spec["ToUnicode"])
         self.unicode_map = FileUnicodeMap()
         CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
     PDFFont.__init__(self, descriptor, widths)
     return
Ejemplo n.º 6
0
 def __init__(self, descriptor, widths, default_width=None):
     self.descriptor = descriptor
     self.widths = widths
     self.fontname = resolve1(descriptor.get("FontName", "unknown"))
     if isinstance(self.fontname, PSLiteral):
         self.fontname = literal_name(self.fontname)
     self.flags = int_value(descriptor.get("Flags", 0))
     self.ascent = num_value(descriptor.get("Ascent", 0))
     self.descent = num_value(descriptor.get("Descent", 0))
     self.italic_angle = num_value(descriptor.get("ItalicAngle", 0))
     self.default_width = default_width or num_value(descriptor.get("MissingWidth", 0))
     self.leading = num_value(descriptor.get("Leading", 0))
     self.bbox = list_value(descriptor.get("FontBBox", (0, 0, 0, 0)))
     self.hscale = self.vscale = 0.001
     return
Ejemplo n.º 7
0
 def get_font(self, objid, spec):
     if objid and objid in self._cached_fonts:
         font = self._cached_fonts[objid]
     else:
         if 2 <= self.debug:
             print(('get_font: create: objid=%r, spec=%r' % (objid, spec)))
         if STRICT:
             if spec['Type'] is not LITERAL_FONT:
                 raise PDFFontError('Type is not /Font')
         # Create a Font object.
         if 'Subtype' in spec:
             subtype = literal_name(spec['Subtype'])
         else:
             if STRICT:
                 raise PDFFontError('Font Subtype is not specified.')
             subtype = 'Type1'
         if subtype in ('Type1', 'MMType1'):
             # Type1 Font
             font = PDFType1Font(self, spec)
         elif subtype == 'TrueType':
             # TrueType Font
             font = PDFTrueTypeFont(self, spec)
         elif subtype == 'Type3':
             # Type3 Font
             font = PDFType3Font(self, spec)
         elif subtype in ('CIDFontType0', 'CIDFontType2'):
             # CID Font
             font = PDFCIDFont(self, spec)
         elif subtype == 'Type0':
             # Type0 Font
             dfonts = list_value(spec['DescendantFonts'])
             assert dfonts
             subspec = dict_value(dfonts[0]).copy()
             for k in ('Encoding', 'ToUnicode'):
                 if k in spec:
                     subspec[k] = resolve1(spec[k])
             font = self.get_font(None, subspec)
         else:
             if STRICT:
                 raise PDFFontError('Invalid Font spec: %r' % spec)
             font = PDFType1Font(self, spec) # this is so wrong!
         if objid and self.caching:
             self._cached_fonts[objid] = font
     return font
Ejemplo n.º 8
0
 def do_keyword(self, pos, token):
     if token is self.KEYWORD_BI:
         # inline image within a content stream
         self.start_type(pos, 'inline')
     elif token is self.KEYWORD_ID:
         try:
             (_, objs) = self.end_type('inline')
             if len(objs) % 2 != 0:
                 raise PSTypeError('Invalid dictionary construct: %r' % objs)
             d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) )
             (pos, data) = self.get_inline_data(pos+len('ID '))
             obj = PDFStream(d, data)
             self.push((pos, obj))
             self.push((pos, self.KEYWORD_EI))
         except PSTypeError:
             if STRICT: raise
     else:
         self.push((pos, token))
     return
Ejemplo n.º 9
0
 def __init__(self, rsrcmgr, spec):
     try:
         self.basefont = literal_name(spec["BaseFont"])
     except KeyError:
         if STRICT:
             raise PDFFontError("BaseFont is missing")
         self.basefont = "unknown"
     try:
         (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
     except KeyError:
         descriptor = dict_value(spec.get("FontDescriptor", {}))
         firstchar = int_value(spec.get("FirstChar", 0))
         lastchar = int_value(spec.get("LastChar", 255))
         widths = list_value(spec.get("Widths", [0] * 256))
         widths = dict((i + firstchar, w) for (i, w) in enumerate(widths))
     PDFSimpleFont.__init__(self, descriptor, widths, spec)
     if "Encoding" not in spec and "FontFile" in descriptor:
         # try to recover the missing encoding info from the font file.
         self.fontfile = stream_value(descriptor.get("FontFile"))
         length1 = int_value(self.fontfile["Length1"])
         data = self.fontfile.get_data()[:length1]
         parser = Type1FontHeaderParser(StringIO(data))
         self.cid2unicode = parser.get_encoding()
     return
Ejemplo n.º 10
0
 def do_cs(self, name):
     self.ncs = self.csmap[literal_name(name)]
     return
Ejemplo n.º 11
0
 def do_CS(self, name):
     self.scs = self.csmap[literal_name(name)]
     return
Ejemplo n.º 12
0
    def __init__(self, rsrcmgr, spec):
        try:
            self.basefont = literal_name(spec["BaseFont"])
        except KeyError:
            if STRICT:
                raise PDFFontError("BaseFont is missing")
            self.basefont = "unknown"
        self.cidsysteminfo = dict_value(spec.get("CIDSystemInfo", {}))
        self.cidcoding = "%s-%s" % (
            self.cidsysteminfo.get("Registry", "unknown"),
            self.cidsysteminfo.get("Ordering", "unknown"),
        )
        try:
            name = literal_name(spec["Encoding"])
        except KeyError:
            if STRICT:
                raise PDFFontError("Encoding is unspecified")
            name = "unknown"
        try:
            self.cmap = CMapDB.get_cmap(name)
        except CMapDB.CMapNotFound as e:
            if STRICT:
                raise PDFFontError(e)
            self.cmap = CMap()
        try:
            descriptor = dict_value(spec["FontDescriptor"])
        except KeyError:
            if STRICT:
                raise PDFFontError("FontDescriptor is missing")
            descriptor = {}
        ttf = None
        if "FontFile2" in descriptor:
            self.fontfile = stream_value(descriptor.get("FontFile2"))
            ttf = TrueTypeFont(self.basefont, StringIO(self.fontfile.get_data()))
        self.unicode_map = None
        if "ToUnicode" in spec:
            strm = stream_value(spec["ToUnicode"])
            self.unicode_map = FileUnicodeMap()
            CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
        elif self.cidcoding == "Adobe-Identity":
            if ttf:
                try:
                    self.unicode_map = ttf.create_unicode_map()
                except TrueTypeFont.CMapNotFound:
                    pass
        else:
            try:
                self.unicode_map = CMapDB.get_unicode_map(self.cidcoding, self.cmap.is_vertical())
            except CMapDB.CMapNotFound as e:
                pass

        self.vertical = self.cmap.is_vertical()
        if self.vertical:
            # writing mode: vertical
            widths = get_widths2(list_value(spec.get("W2", [])))
            self.disps = dict((cid, (vx, vy)) for (cid, (_, (vx, vy))) in list(widths.items()))
            (vy, w) = spec.get("DW2", [880, -1000])
            self.default_disp = (None, vy)
            widths = dict((cid, w) for (cid, (w, _)) in list(widths.items()))
            default_width = w
        else:
            # writing mode: horizontal
            self.disps = {}
            self.default_disp = 0
            widths = get_widths(list_value(spec.get("W", [])))
            default_width = spec.get("DW", 1000)
        PDFFont.__init__(self, descriptor, widths, default_width=default_width)
        return
Ejemplo n.º 13
0
 def do_keyword(self, pos, token):
     if token is self.KEYWORD_PUT:
         ((_, key), (_, value)) = self.pop(2)
         if isinstance(key, int) and isinstance(value, PSLiteral):
             self.add_results((key, literal_name(value)))
     return
Ejemplo n.º 14
0
    def do_keyword(self, pos, token):
        name = token.name
        if name == 'begincmap':
            self._in_cmap = True
            self.popall()
            return
        elif name == 'endcmap':
            self._in_cmap = False
            return
        if not self._in_cmap: return
        #
        if name == 'def':
            try:
                ((_,k),(_,v)) = self.pop(2)
                self.cmap.set_attr(literal_name(k), v)
            except PSSyntaxError:
                pass
            return

        if name == 'usecmap':
            try:
                ((_,cmapname),) = self.pop(1)
                self.cmap.use_cmap(CMapDB.get_cmap(literal_name(cmapname)))
            except PSSyntaxError:
                pass
            except CMapDB.CMapNotFound:
                pass
            return

        if name == 'begincodespacerange':
            self.popall()
            return
        if name == 'endcodespacerange':
            self.popall()
            return

        if name == 'begincidrange':
            self.popall()
            return
        if name == 'endcidrange':
            objs = [ obj for (_,obj) in self.popall() ]
            for (s,e,cid) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str) or
                    not isinstance(cid, int) or len(s) != len(e)): continue
                sprefix = s[:-4]
                eprefix = e[:-4]
                if sprefix != eprefix: continue
                svar = s[-4:]
                evar = e[-4:]
                s1 = nunpack(svar)
                e1 = nunpack(evar)
                vlen = len(svar)
                #assert s1 <= e1
                for i in range(e1-s1+1):
                    x = sprefix+struct.pack('>L',s1+i)[-vlen:]
                    self.cmap.add_code2cid(x, cid+i)
            return

        if name == 'begincidchar':
            self.popall()
            return
        if name == 'endcidchar':
            objs = [ obj for (_,obj) in self.popall() ]
            for (cid,code) in choplist(2, objs):
                if isinstance(code, str) and isinstance(cid, str):
                    self.cmap.add_code2cid(code, nunpack(cid))
            return

        if name == 'beginbfrange':
            self.popall()
            return
        if name == 'endbfrange':
            objs = [ obj for (_,obj) in self.popall() ]
            for (s,e,code) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str) or
                    len(s) != len(e)): continue
                s1 = nunpack(s)
                e1 = nunpack(e)
                #assert s1 <= e1
                if isinstance(code, list):
                    for i in range(e1-s1+1):
                        self.cmap.add_cid2unichr(s1+i, code[i])
                else:
                    var = code[-4:]
                    base = nunpack(var)
                    prefix = code[:-4]
                    vlen = len(var)
                    for i in range(e1-s1+1):
                        x = prefix+struct.pack('>L',base+i)[-vlen:]
                        self.cmap.add_cid2unichr(s1+i, x)
            return

        if name == 'beginbfchar':
            self.popall()
            return
        if name == 'endbfchar':
            objs = [ obj for (_,obj) in self.popall() ]
            for (cid,code) in choplist(2, objs):
                if isinstance(cid, str) and isinstance(code, str):
                    self.cmap.add_cid2unichr(nunpack(cid), code)
            return

        if name == 'beginnotdefrange':
            self.popall()
            return
        if name == 'endnotdefrange':
            self.popall()
            return

        self.push((pos, token))
        return