def get_colorspace(spec): if isinstance(spec, list): name = literal_name(spec[0]) else: name = literal_name(spec) if name == 'ICCBased' and isinstance(spec, list) and 2 <= len(spec): return PDFColorSpace(name, stream_value(spec[1])['N']) elif name == 'DeviceN' and isinstance(spec, list) and 2 <= len(spec): return PDFColorSpace(name, len(list_value(spec[1]))) else: return PREDEFINED_COLORSPACE.get(name)
def get_colorspace(spec): if isinstance(spec, list): name = literal_name(spec[0]) else: name = literal_name(spec) if name == "ICCBased" and isinstance(spec, list) and 2 <= len(spec): return ColorSpace(name, stream_value(spec[1]).dic["N"]) elif name == "DeviceN" and isinstance(spec, list) and 2 <= len(spec): return ColorSpace(name, len(list_value(spec[1]))) else: return PREDEFINED_COLORSPACE[name]
def get_colorspace(spec): if isinstance(spec, list): name = literal_name(spec[0]) else: name = literal_name(spec) if name == 'ICCBased' and isinstance(spec, list) and 2 <= len(spec): return PDFColorSpace(name, stream_value(spec[1])['N']) elif name == 'DeviceN' and isinstance(spec, list) and 2 <= len(spec): return PDFColorSpace(name, len(list_value(spec[1]))) else: return PREDEFINED_COLORSPACE[name]
def do_Do(self, xobjid): xobjid = literal_name(xobjid) try: xobj = stream_value(self.xobjmap[xobjid]) except KeyError: if STRICT: raise PDFInterpreterError("Undefined xobject id: %r" % xobjid) return if 1 <= self.debug: print >> stderr, "Processing xobj: %r" % xobj subtype = xobj.dic.get("Subtype") try: if subtype == LITERAL_FORM and "BBox" in xobj.dic: interpreter = PDFPageInterpreter(self.rsrc, self.device) (x0, y0, x1, y1) = xobj.dic["BBox"] ctm = mult_matrix(xobj.dic.get("Matrix", MATRIX_IDENTITY), self.ctm) (x0, y0) = apply_matrix(ctm, (x0, y0)) (x1, y1) = apply_matrix(ctm, (x1, y1)) bbox = (x0, y0, x1, y1) self.device.begin_figure(xobjid, bbox) interpreter.render_contents(xobj.dic.get("Resources"), [xobj], ctm=ctm) self.device.end_figure(xobjid) elif subtype == LITERAL_IMAGE and "Width" in xobj.dic and "Height" in xobj.dic: (x0, y0) = apply_matrix(self.ctm, (0, 0)) (x1, y1) = apply_matrix(self.ctm, (1, 1)) self.device.begin_figure(xobjid, (x0, y0, x1, y1)) (w, h) = (xobj.dic["Width"], xobj.dic["Height"]) self.device.render_image(xobj, (w, h), self.ctm) self.device.end_figure(xobjid) except TypeError: pass else: # unsupported xobject type. pass return
def do_Do(self, xobjid): xobjid = literal_name(xobjid) try: xobj = stream_value(self.xobjmap[xobjid]) except KeyError: if STRICT: raise PDFInterpreterError('Undefined xobject id: %r' % xobjid) return if 1 <= self.debug: print >>sys.stderr, 'Processing xobj: %r' % xobj subtype = xobj.get('Subtype') if subtype is LITERAL_FORM and 'BBox' in xobj: interpreter = self.dup() bbox = list_value(xobj['BBox']) matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY)) # According to PDF reference 1.7 section 4.9.1, XObjects in # earlier PDFs (prior to v1.2) use the page's Resources entry # instead of having their own Resources entry. resources = dict_value(xobj.get('Resources')) or self.resources.copy() self.device.begin_figure(xobjid, bbox, matrix) interpreter.render_contents(resources, [xobj], ctm=mult_matrix(matrix, self.ctm)) self.device.end_figure(xobjid) elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj: self.device.begin_figure(xobjid, (0,0,1,1), MATRIX_IDENTITY) self.device.render_image(xobjid, xobj) self.device.end_figure(xobjid) else: # unsupported xobject type. pass return
def do_cs(self, name): try: self.ncs = self.csmap[literal_name(name)] except KeyError: if STRICT: raise PDFInterpreterError('Undefined ColorSpace: %r' % name) return
def do_keyword(self, pos, token): if token is self.KEYWORD_PUT: ((_,key),(_,value)) = self.pop(2) if (isinstance(key, int) and isinstance(value, PSLiteral)): self.add_results((key, literal_name(value))) return
def do_Do(self, xobjid): xobjid = literal_name(xobjid) try: xobj = stream_value(self.xobjmap[xobjid]) except KeyError: if STRICT: raise PDFInterpreterError('Undefined xobject id: %r' % xobjid) return if 1 <= self.debug: print >>stderr, 'Processing xobj: %r' % xobj subtype = xobj.get('Subtype') if subtype is LITERAL_FORM and 'BBox' in xobj: interpreter = self.dup() bbox = list_value(xobj['BBox']) matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY)) self.device.begin_figure(xobjid, bbox, matrix) interpreter.render_contents(dict_value(xobj.get('Resources')), [xobj], ctm=mult_matrix(matrix, self.ctm)) self.device.end_figure(xobjid) elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj: self.device.begin_figure(xobjid, (0,0,1,1), MATRIX_IDENTITY) self.device.render_image(xobjid, xobj) self.device.end_figure(xobjid) else: # unsupported xobject type. pass return
def do_Do(self, xobjid): xobjid = literal_name(xobjid) try: xobj = stream_value(self.xobjmap[xobjid]) except KeyError: if STRICT: raise PDFInterpreterError('Undefined xobject id: %r' % xobjid) return if 1 <= self.debug: print >> stderr, 'Processing xobj: %r' % xobj subtype = xobj.dic.get('Subtype') if subtype is LITERAL_FORM and 'BBox' in xobj.dic: interpreter = self.dup() (x0, y0, x1, y1) = list_value(xobj.dic['BBox']) ctm = mult_matrix( list_value(xobj.dic.get('Matrix', MATRIX_IDENTITY)), self.ctm) (x0, y0) = apply_matrix(ctm, (x0, y0)) (x1, y1) = apply_matrix(ctm, (x1, y1)) bbox = (x0, y0, x1, y1) self.device.begin_figure(xobjid, bbox) interpreter.render_contents(dict_value(xobj.dic.get('Resources')), [xobj], ctm=ctm) self.device.end_figure(xobjid) elif subtype is LITERAL_IMAGE and 'Width' in xobj.dic and 'Height' in xobj.dic: (x0, y0) = apply_matrix(self.ctm, (0, 0)) (x1, y1) = apply_matrix(self.ctm, (1, 1)) self.device.begin_figure(xobjid, (x0, y0, x1, y1)) (w, h) = (xobj.dic['Width'], xobj.dic['Height']) self.device.render_image(xobj, (w, h), self.ctm) self.device.end_figure(xobjid) else: # unsupported xobject type. pass return
def do_Tf(self, fontid, fontsize): """selectfont""" try: self.textstate.font = self.fontmap[literal_name(fontid)] except KeyError: handle_error(PDFInterpreterError, 'Undefined Font id: %r' % fontid) self.textstate.font = PDFType1Font(self.rsrcmgr, {'BaseFont': 'Times-Roman'}) self.textstate.fontsize = fontsize
def decrypt(self, objid, genno, data, attrs=None, name=None): if not self.encrypt_metadata and attrs is not None: t = attrs.get('Type') if t is not None and literal_name(t) == 'Metadata': return data if name is None: name = self.strf return self.cfm[name](objid, genno, data)
def initialize(self, password=''): """Perform the initialization with a given password. This step is mandatory even if there's no password associated with the document. """ if not self.encryption: self.is_printable = self.is_modifiable = self.is_extractable = True return (docid, param) = self.encryption if literal_name(param.get('Filter')) != 'Standard': raise PDFEncryptionError('Unknown filter: param=%r' % param) V = int_value(param.get('V', 0)) if not (V == 1 or V == 2): raise PDFEncryptionError('Unknown algorithm: param=%r' % param) length = int_value(param.get('Length', 40)) # Key length (bits) O = str_value(param['O']) R = int_value(param['R']) # Revision if 5 <= R: raise PDFEncryptionError('Unknown revision: %r' % R) U = str_value(param['U']) P = int_value(param['P']) self.is_printable = bool(P & 4) self.is_modifiable = bool(P & 8) self.is_extractable = bool(P & 16) # Algorithm 3.2 password = (password + self.PASSWORD_PADDING)[:32] # 1 md5hash = md5.md5(password) # 2 md5hash.update(O) # 3 md5hash.update(struct.pack('<l', P)) # 4 md5hash.update(docid[0]) # 5 if 4 <= R: # 6 raise PDFNotImplementedError('Revision 4 encryption is currently unsupported') if 3 <= R: # 8 for _ in xrange(50): md5hash = md5.md5(md5hash.digest()[:length / 8]) key = md5hash.digest()[:length / 8] if R == 2: # Algorithm 3.4 u1 = Arcfour(key).process(self.PASSWORD_PADDING) elif R == 3: # Algorithm 3.5 md5hash = md5.md5(self.PASSWORD_PADDING) # 2 md5hash.update(docid[0]) # 3 x = Arcfour(key).process(md5hash.digest()[:16]) # 4 for i in xrange(1, 19 + 1): k = ''.join(chr(ord(c) ^ i) for c in key) x = Arcfour(k).process(x) u1 = x + x # 32bytes total if R == 2: is_authenticated = (u1 == U) else: is_authenticated = (u1[:16] == U[:16]) if not is_authenticated: raise PDFPasswordIncorrect self.decrypt_key = key self.decipher = self.decrypt_rc4 # XXX may be AES
def initialize(self, password=''): if not self.encryption: self.is_printable = self.is_modifiable = self.is_extractable = True self._initialized = True return (docid, param) = self.encryption if literal_name(param['Filter']) != 'Standard': raise PDFEncryptionError('Unknown filter: param=%r' % param) V = int_value(param.get('V', 0)) if not (V == 1 or V == 2): raise PDFEncryptionError('Unknown algorithm: param=%r' % param) length = int_value(param.get('Length', 40)) # Key length (bits) O = str_value(param['O']) R = int_value(param['R']) # Revision if 5 <= R: raise PDFEncryptionError('Unknown revision: %r' % R) U = str_value(param['U']) P = int_value(param['P']) self.is_printable = bool(P & 4) self.is_modifiable = bool(P & 8) self.is_extractable = bool(P & 16) # Algorithm 3.2 password = (password + self.PASSWORD_PADDING)[:32] # 1 hash = md5.md5(password) # 2 hash.update(O) # 3 hash.update(struct.pack('<l', P)) # 4 hash.update(docid[0]) # 5 if 4 <= R: # 6 raise PDFNotImplementedError( 'Revision 4 encryption is currently unsupported') if 3 <= R: # 8 for _ in xrange(50): hash = md5.md5(hash.digest()[:length / 8]) key = hash.digest()[:length / 8] if R == 2: # Algorithm 3.4 u1 = Arcfour(key).process(self.PASSWORD_PADDING) elif R == 3: # Algorithm 3.5 hash = md5.md5(self.PASSWORD_PADDING) # 2 hash.update(docid[0]) # 3 x = Arcfour(key).process(hash.digest()[:16]) # 4 for i in xrange(1, 19 + 1): k = ''.join(chr(ord(c) ^ i) for c in key) x = Arcfour(k).process(x) u1 = x + x # 32bytes total if R == 2: is_authenticated = (u1 == U) else: is_authenticated = (u1[:16] == U[:16]) if not is_authenticated: raise PDFPasswordIncorrect self.decrypt_key = key self.decipher = self.decrypt_rc4 # XXX may be AES self._initialized = True return
def do_Tf(self, fontid, fontsize): try: self.textstate.font = self.fontmap[literal_name(fontid)] except KeyError: if STRICT: raise PDFInterpreterError('Undefined Font id: %r' % fontid) return self.textstate.fontsize = fontsize return
def do_Tf(self, fontid, fontsize): try: self.textstate.font = self.fontmap[literal_name(fontid)] except KeyError: if STRICT: raise PDFInterpreterError("Undefined Font id: %r" % fontid) self.textstate.font = self.rsrcmgr.get_font(None, {}) self.textstate.fontsize = fontsize return
def do_Tf(self, fontid, fontsize): try: self.textstate.font = self.fontmap[literal_name(fontid)] except KeyError: if STRICT: raise PDFInterpreterError('Undefined Font id: %r' % fontid) self.textstate.font = self.rsrcmgr.get_font(None, {}) self.textstate.fontsize = fontsize return
def init_params(self): super(PDFStandardSecurityHandlerV4, self).init_params() self.length = 128 self.cf = dict_value(self.param.get('CF')) self.stmf = literal_name(self.param['StmF']) self.strf = literal_name(self.param['StrF']) self.encrypt_metadata = bool(self.param.get('EncryptMetadata', True)) if self.stmf != self.strf: raise PDFEncryptionError('Unsupported crypt filter: param=%r' % self.param) self.cfm = {} for k, v in self.cf.items(): f = self.get_cfm(literal_name(v['CFM'])) if f is None: raise PDFEncryptionError('Unknown crypt filter method: param=%r' % self.param) self.cfm[k] = f self.cfm['Identity'] = self.decrypt_identity if self.strf not in self.cfm: raise PDFEncryptionError('Undefined crypt filter: param=%r' % self.param)
def do_Tf(self, fontid, fontsize): try: self.textstate.font = self.fontmap[literal_name(fontid)] except KeyError: if STRICT: raise PDFInterpreterError('Undefined Font id: %r' % fontid) self.textstate.font = PDFType1Font( self.rsrcmgr, {'BaseFont':'Times-Roman'}) self.textstate.fontsize = fontsize return
def do_Tf(self, fontid, fontsize): try: self.textstate.font = self.fontmap[literal_name(fontid)] except KeyError: raise if STRICT: raise PDFInterpreterError('Undefined Font id: %r' % fontid) return self.textstate.fontsize = fontsize return
def __init__(self, rsrcmgr, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: handle_error(PDFFontError, 'BaseFont is missing') self.basefont = 'unknown' self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {})) self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', 'unknown'), self.cidsysteminfo.get('Ordering', 'unknown')) try: name = literal_name(spec['Encoding']) except KeyError: handle_error(PDFFontError, 'Encoding is unspecified') name = 'unknown' try: self.cmap = CMapDB.get_cmap(name) except CMapDB.CMapNotFound, e: handle_error(PDFFontError, str(e)) self.cmap = CMap()
def __init__(self, descriptor, widths, spec): # Font encoding is specified either by a name of # built-in encoding or a dictionary that describes # the differences. if 'Encoding' in spec: encoding = resolve1(spec['Encoding']) else: encoding = LITERAL_STANDARD_ENCODING if isinstance(encoding, dict): name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING)) diff = list_value(encoding.get('Differences', None)) self.cid2unicode = EncodingDB.get_encoding(name, diff) else: self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding)) self.unicode_map = None if 'ToUnicode' in spec: strm = stream_value(spec['ToUnicode']) self.unicode_map = FileUnicodeMap() CMapParser(self.unicode_map, StringIO(strm.get_data())).run() PDFFont.__init__(self, descriptor, widths)
def __init__(self, descriptor, widths, default_width=None): self.descriptor = descriptor self.widths = widths self.fontname = descriptor.get('FontName', 'unknown') if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) self.ascent = num_value(descriptor.get('Ascent', 0)) self.descent = num_value(descriptor.get('Descent', 0)) self.default_width = default_width or descriptor.get('MissingWidth', 0) self.leading = num_value(descriptor.get('Leading', 0)) self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0))) return
def __init__(self, descriptor, widths, spec): # Font encoding is specified either by a name of # built-in encoding or a dictionary that describes # the differences. if 'Encoding' in spec: encoding = resolve1(spec['Encoding']) else: encoding = LITERAL_STANDARD_ENCODING if isinstance(encoding, dict): name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING)) diff = list_value(encoding.get('Differences', None)) self.cid2unicode = EncodingDB.get_encoding(name, diff) else: self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding)) self.unicode_map = None if 'ToUnicode' in spec: strm = stream_value(spec['ToUnicode']) self.unicode_map = FileUnicodeMap() CMapParser(self.unicode_map, StringIO(strm.get_data())).run() PDFFont.__init__(self, descriptor, widths) return
def __init__(self, descriptor, widths, spec): # Font encoding is specified either by a name of # built-in encoding or a dictionary that describes # the differences. if "Encoding" in spec: encoding = resolve1(spec["Encoding"]) else: encoding = LITERAL_STANDARD_ENCODING if isinstance(encoding, dict): name = literal_name(encoding.get("BaseEncoding", LITERAL_STANDARD_ENCODING)) diff = list_value(encoding.get("Differences", None)) self.encoding = EncodingDB.get_encoding(name, diff) else: self.encoding = EncodingDB.get_encoding(literal_name(encoding)) self.ucs2_cmap = None if "ToUnicode" in spec: strm = stream_value(spec["ToUnicode"]) self.ucs2_cmap = CMap() CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run() PDFFont.__init__(self, descriptor, widths) return
def __init__(self, rsrc, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: if STRICT: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {})) self.cidcoding = '%s-%s' % (self.cidsysteminfo.get( 'Registry', 'unknown'), self.cidsysteminfo.get('Ordering', 'unknown')) try: name = literal_name(spec['Encoding']) except KeyError: if STRICT: raise PDFFontError('Encoding is unspecified') name = 'unknown' try: self.cmap = rsrc.get_cmap(name, strict=STRICT) except CMapDB.CMapNotFound, e: raise PDFFontError(e)
def __init__(self, descriptor, widths, default_width=None): self.descriptor = descriptor self.widths = widths self.fontname = descriptor.get('FontName', 'unknown') if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) self.ascent = num_value(descriptor.get('Ascent', 0)) self.descent = num_value(descriptor.get('Descent', 0)) self.default_width = default_width or descriptor.get('MissingWidth', 0) self.leading = num_value(descriptor.get('Leading', 0)) self.bbox = list_value(descriptor.get('FontBBox', (0, 0, 0, 0))) return
def __init__(self, descriptor, widths, default_width=None): self.descriptor = descriptor self.widths = widths self.fontname = descriptor.get("FontName", "unknown") if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) self.ascent = num_value(descriptor.get("Ascent", 0)) self.descent = num_value(descriptor.get("Descent", 0)) self.default_width = default_width or descriptor.get("MissingWidth", 0) self.leading = num_value(descriptor.get("Leading", 0)) self.bbox = list_value(descriptor.get("FontBBox", (0, 0, 0, 0))) return
def __init__(self, spec): try: self.basefont = literal_name(spec["BaseFont"]) except KeyError: if STRICT: raise PDFFontError("BaseFont is missing") self.basefont = "unknown" self.cidsysteminfo = dict_value(spec.get("CIDSystemInfo", {})) self.cidcoding = "%s-%s" % ( self.cidsysteminfo.get("Registry", "unknown"), self.cidsysteminfo.get("Ordering", "unknown"), ) try: name = literal_name(spec["Encoding"]) except KeyError: if STRICT: raise PDFFontError("Encoding not specified") name = "unknown" try: self.cmap = CMapDB.get_cmap(name, strict=STRICT) except CMapDB.CMapNotFound, e: raise PDFFontError(e)
def __init__(self, descriptor, widths, default_width=None): self.descriptor = descriptor self.widths = widths self.fontname = resolve1(descriptor.get('FontName', 'unknown')) if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) self.flags = int_value(descriptor.get('Flags', 0)) self.ascent = num_value(descriptor.get('Ascent', 0)) self.descent = num_value(descriptor.get('Descent', 0)) self.italic_angle = num_value(descriptor.get('ItalicAngle', 0)) self.default_width = default_width or num_value(descriptor.get('MissingWidth', 0)) self.leading = num_value(descriptor.get('Leading', 0)) self.bbox = list_value(descriptor.get('FontBBox', (0, 0, 0, 0))) self.hscale = self.vscale = .001
def __init__(self, descriptor, widths, default_width=None): self.descriptor = descriptor self.widths = widths self.fontname = resolve1(descriptor.get("FontName", "unknown")) if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) self.flags = int_value(descriptor.get("Flags", 0)) self.ascent = num_value(descriptor.get("Ascent", 0)) self.descent = num_value(descriptor.get("Descent", 0)) self.italic_angle = num_value(descriptor.get("ItalicAngle", 0)) self.default_width = default_width or num_value(descriptor.get("MissingWidth", 0)) self.leading = num_value(descriptor.get("Leading", 0)) self.bbox = list_value(descriptor.get("FontBBox", (0, 0, 0, 0))) self.hscale = self.vscale = 0.001 return
def __init__(self, descriptor, widths, default_width=None): self.descriptor = descriptor self.widths = widths self.fontname = resolve1(descriptor.get('FontName', 'unknown')) if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) self.flags = int_value(descriptor.get('Flags', 0)) self.ascent = num_value(descriptor.get('Ascent', 0)) self.descent = num_value(descriptor.get('Descent', 0)) self.italic_angle = num_value(descriptor.get('ItalicAngle', 0)) self.default_width = default_width or num_value(descriptor.get('MissingWidth', 0)) self.leading = num_value(descriptor.get('Leading', 0)) self.bbox = list_value(descriptor.get('FontBBox', (0,0,0,0))) self.hscale = self.vscale = .001 return
def _initialize_password(self, password=''): (docid, param) = self.encryption if literal_name(param.get('Filter')) != 'Standard': raise PDFEncryptionError('Unknown filter: param=%r' % param) v = int_value(param.get('V', 0)) factory = self.security_handler_registry.get(v) if factory is None: raise PDFEncryptionError('Unknown algorithm: param=%r' % param) handler = factory(docid, param, password) self.decipher = handler.decrypt self.is_printable = handler.is_printable() self.is_modifiable = handler.is_modifiable() self.is_extractable = handler.is_extractable() self._parser.fallback = False # need to read streams with exact length return
def do_keyword(self, pos, token): if token is self.KEYWORD_BI: # inline image within a content stream self.start_type(pos, 'inline') elif token is self.KEYWORD_ID: try: (_, objs) = self.end_type('inline') if len(objs) % 2 != 0: raise PSTypeError('Invalid dictionary construct: %r' % objs) d = dict((literal_name(k), v) for (k, v) in choplist(2, objs)) (pos, data) = self.get_inline_data(pos + len('ID ')) obj = PDFStream(d, data) self.push((pos, obj)) self.push((pos, self.KEYWORD_EI)) except PSTypeError, e: handle_error(type(e), str(e))
def __init__(self, rsrc, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: if STRICT: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' try: (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont) except KeyError: descriptor = dict_value(spec.get('FontDescriptor', {})) firstchar = int_value(spec.get('FirstChar', 0)) lastchar = int_value(spec.get('LastChar', 255)) widths = list_value(spec.get('Widths', [0] * 256)) widths = dict((i + firstchar, w) for (i, w) in enumerate(widths)) PDFSimpleFont.__init__(self, descriptor, widths, spec) return
def __init__(self, spec): try: self.basefont = literal_name(spec["BaseFont"]) except KeyError: if STRICT: raise PDFFontError("BaseFont is missing") self.basefont = "unknown" try: (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont) except KeyError: descriptor = dict_value(spec.get("FontDescriptor", {})) firstchar = int_value(spec.get("FirstChar", 0)) lastchar = int_value(spec.get("LastChar", 255)) widths = list_value(spec.get("Widths", [0] * 256)) widths = dict((i + firstchar, w) for (i, w) in enumerate(widths)) PDFSimpleFont.__init__(self, descriptor, widths, spec) return
def __init__(self, rsrc, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: if STRICT: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' try: (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont) except KeyError: descriptor = dict_value(spec.get('FontDescriptor', {})) firstchar = int_value(spec.get('FirstChar', 0)) lastchar = int_value(spec.get('LastChar', 255)) widths = list_value(spec.get('Widths', [0]*256)) widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths) ) PDFSimpleFont.__init__(self, descriptor, widths, spec) return
def get_font(self, objid, spec): if objid and objid in self._cached_fonts: font = self._cached_fonts[objid] else: if 2 <= self.debug: print >> sys.stderr, 'get_font: create: objid=%r, spec=%r' % ( objid, spec) if STRICT: if spec['Type'] is not LITERAL_FONT: raise PDFFontError('Type is not /Font') # Create a Font object. if 'Subtype' in spec: subtype = literal_name(spec['Subtype']) else: if STRICT: raise PDFFontError('Font Subtype is not specified.') subtype = 'Type1' if subtype in ('Type1', 'MMType1'): # Type1 Font font = PDFType1Font(self, spec) elif subtype == 'TrueType': # TrueType Font font = PDFTrueTypeFont(self, spec) elif subtype == 'Type3': # Type3 Font font = PDFType3Font(self, spec) elif subtype in ('CIDFontType0', 'CIDFontType2'): # CID Font font = PDFCIDFont(self, spec) elif subtype == 'Type0': # Type0 Font dfonts = list_value(spec['DescendantFonts']) assert dfonts subspec = dict_value(dfonts[0]).copy() for k in ('Encoding', 'ToUnicode'): if k in spec: subspec[k] = resolve1(spec[k]) font = self.get_font(None, subspec) else: if STRICT: raise PDFFontError('Invalid Font spec: %r' % spec) font = PDFType1Font(self, spec) # this is so wrong! if objid and self.caching: self._cached_fonts[objid] = font return font
def get_font(self, objid, spec): if objid and objid in self._cached_fonts: font = self._cached_fonts[objid] else: if 2 <= self.debug: print >>sys.stderr, "get_font: create: objid=%r, spec=%r" % (objid, spec) if STRICT: if spec["Type"] is not LITERAL_FONT: raise PDFFontError("Type is not /Font") # Create a Font object. if "Subtype" in spec: subtype = literal_name(spec["Subtype"]) else: if STRICT: raise PDFFontError("Font Subtype is not specified.") subtype = "Type1" if subtype in ("Type1", "MMType1"): # Type1 Font font = PDFType1Font(self, spec) elif subtype == "TrueType": # TrueType Font font = PDFTrueTypeFont(self, spec) elif subtype == "Type3": # Type3 Font font = PDFType3Font(self, spec) elif subtype in ("CIDFontType0", "CIDFontType2"): # CID Font font = PDFCIDFont(self, spec) elif subtype == "Type0": # Type0 Font dfonts = list_value(spec["DescendantFonts"]) assert dfonts subspec = dict_value(dfonts[0]).copy() for k in ("Encoding", "ToUnicode"): if k in spec: subspec[k] = resolve1(spec[k]) font = self.get_font(None, subspec) else: if STRICT: raise PDFFontError("Invalid Font spec: %r" % spec) font = PDFType1Font(self, spec) # this is so wrong! if objid and self.caching: self._cached_fonts[objid] = font return font
def get_font(self, objid, spec): if objid and objid in self._cached_fonts: font = self._cached_fonts[objid] else: if 2 <= self.debug: print >>sys.stderr, 'get_font: create: objid=%r, spec=%r' % (objid, spec) if STRICT: if spec['Type'] is not LITERAL_FONT: raise PDFFontError('Type is not /Font') # Create a Font object. if 'Subtype' in spec: subtype = literal_name(spec['Subtype']) else: if STRICT: raise PDFFontError('Font Subtype is not specified.') subtype = 'Type1' if subtype in ('Type1', 'MMType1'): # Type1 Font font = PDFType1Font(self, spec) elif subtype == 'TrueType': # TrueType Font font = PDFTrueTypeFont(self, spec) elif subtype == 'Type3': # Type3 Font font = PDFType3Font(self, spec) elif subtype in ('CIDFontType0', 'CIDFontType2'): # CID Font font = PDFCIDFont(self, spec) elif subtype == 'Type0': # Type0 Font dfonts = list_value(spec['DescendantFonts']) assert dfonts subspec = dict_value(dfonts[0]).copy() for k in ('Encoding', 'ToUnicode'): if k in spec: subspec[k] = resolve1(spec[k]) font = self.get_font(None, subspec) else: if STRICT: raise PDFFontError('Invalid Font spec: %r' % spec) font = PDFType1Font(self, spec) # this is so wrong! if objid and self.caching: self._cached_fonts[objid] = font return font
def do_keyword(self, pos, token): if token is self.KEYWORD_BI: # inline image within a content stream self.start_type(pos, 'inline') elif token is self.KEYWORD_ID: try: (_, objs) = self.end_type('inline') if len(objs) % 2 != 0: raise PSTypeError('Invalid dictionary construct: %r' % objs) d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) ) (pos, data) = self.get_inline_data(pos+len('ID ')) obj = PDFStream(d, data) self.push((pos, obj)) self.push((pos, self.KEYWORD_EI)) except PSTypeError: if STRICT: raise else: self.push((pos, token)) return
def do_keyword(self, pos, token): if token is self.KEYWORD_BI: # inline image within a content stream self.start_type(pos, "inline") elif token is self.KEYWORD_ID: try: (_, objs) = self.end_type("inline") if len(objs) % 2 != 0: raise PSTypeError("Invalid dictionary construct: %r" % objs) d = dict((literal_name(k), v) for (k, v) in choplist(2, objs)) (pos, data) = self.get_inline_data(pos + len("ID ")) obj = PDFStream(d, data) self.push((pos, obj)) self.push((pos, self.KEYWORD_EI)) except PSTypeError: if STRICT: raise else: self.push((pos, token)) return
def __init__(self, rsrcmgr, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: handle_error(PDFFontError, 'BaseFont is missing') self.basefont = 'unknown' try: (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont) except KeyError: descriptor = dict_value(spec.get('FontDescriptor', {})) firstchar = int_value(spec.get('FirstChar', 0)) lastchar = int_value(spec.get('LastChar', 255)) widths = list_value(spec.get('Widths', [0] * 256)) widths = dict((i+firstchar, w) for (i, w) in enumerate(widths)) PDFSimpleFont.__init__(self, descriptor, widths, spec) if 'Encoding' not in spec and 'FontFile' in descriptor: # try to recover the missing encoding info from the font file. self.fontfile = stream_value(descriptor.get('FontFile')) length1 = int_value(self.fontfile['Length1']) data = self.fontfile.get_data()[:length1] parser = Type1FontHeaderParser(StringIO(data)) self.cid2unicode = parser.get_encoding()
def __init__(self, rsrcmgr, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: if STRICT: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' try: (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont) except KeyError: descriptor = dict_value(spec.get('FontDescriptor', {})) firstchar = int_value(spec.get('FirstChar', 0)) lastchar = int_value(spec.get('LastChar', 255)) widths = list_value(spec.get('Widths', [0]*256)) widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths) ) PDFSimpleFont.__init__(self, descriptor, widths, spec) if 'Encoding' not in spec and 'FontFile' in descriptor: # try to recover the missing encoding info from the font file. self.fontfile = stream_value(descriptor.get('FontFile')) length1 = int_value(self.fontfile['Length1']) data = self.fontfile.get_data()[:length1] parser = Type1FontHeaderParser(StringIO(data)) self.cid2unicode = parser.get_encoding() return
def do_cs(self, name): self.ncs = self.csmap[literal_name(name)] return
def do_CS(self, name): self.scs = self.csmap[literal_name(name)] return
def do_keyword(self, pos, token): name = token.name if name == 'begincmap': self._in_cmap = True self.popall() return elif name == 'endcmap': self._in_cmap = False return if not self._in_cmap: return # if name == 'def': try: ((_, k), (_, v)) = self.pop(2) self.cmap.set_attr(literal_name(k), v) except PSSyntaxError: pass return if name == 'usecmap': try: ((_, cmapname), ) = self.pop(1) self.cmap.use_cmap(CMapDB.get_cmap(literal_name(cmapname))) except PSSyntaxError: pass except CMapDB.CMapNotFound: pass return if name == 'begincodespacerange': self.popall() return if name == 'endcodespacerange': self.popall() return if name == 'begincidrange': self.popall() return if name == 'endcidrange': objs = [obj for (__, obj) in self.popall()] for (s, e, cid) in choplist(3, objs): if (not isinstance(s, str) or not isinstance(e, str) or not isinstance(cid, int) or len(s) != len(e)): continue sprefix = s[:-4] eprefix = e[:-4] if sprefix != eprefix: continue svar = s[-4:] evar = e[-4:] s1 = nunpack(svar) e1 = nunpack(evar) vlen = len(svar) #assert s1 <= e1 for i in xrange(e1 - s1 + 1): x = sprefix + struct.pack('>L', s1 + i)[-vlen:] self.cmap.add_code2cid(x, cid + i) return if name == 'begincidchar': self.popall() return if name == 'endcidchar': objs = [obj for (__, obj) in self.popall()] for (cid, code) in choplist(2, objs): if isinstance(code, str) and isinstance(cid, str): self.cmap.add_code2cid(code, nunpack(cid)) return if name == 'beginbfrange': self.popall() return if name == 'endbfrange': objs = [obj for (__, obj) in self.popall()] for (s, e, code) in choplist(3, objs): if (not isinstance(s, str) or not isinstance(e, str) or len(s) != len(e)): continue s1 = nunpack(s) e1 = nunpack(e) #assert s1 <= e1 if isinstance(code, list): for i in xrange(e1 - s1 + 1): self.cmap.add_cid2unichr(s1 + i, code[i]) else: var = code[-4:] base = nunpack(var) prefix = code[:-4] vlen = len(var) for i in xrange(e1 - s1 + 1): x = prefix + struct.pack('>L', base + i)[-vlen:] self.cmap.add_cid2unichr(s1 + i, x) return if name == 'beginbfchar': self.popall() return if name == 'endbfchar': objs = [obj for (__, obj) in self.popall()] for (cid, code) in choplist(2, objs): if isinstance(cid, str) and isinstance(code, str): self.cmap.add_cid2unichr(nunpack(cid), code) return if name == 'beginnotdefrange': self.popall() return if name == 'endnotdefrange': self.popall() return self.push((pos, token)) return
def __init__(self, rsrcmgr, spec): try: self.basefont = literal_name(spec['BaseFont']) except KeyError: if STRICT: raise PDFFontError('BaseFont is missing') self.basefont = 'unknown' self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {})) self.cidcoding = '%s-%s' % (self.cidsysteminfo.get( 'Registry', 'unknown'), self.cidsysteminfo.get('Ordering', 'unknown')) try: name = literal_name(spec['Encoding']) except KeyError: if STRICT: raise PDFFontError('Encoding is unspecified') name = 'unknown' try: self.cmap = CMapDB.get_cmap(name) except CMapDB.CMapNotFound as e: if STRICT: raise PDFFontError(e) self.cmap = CMap() try: descriptor = dict_value(spec['FontDescriptor']) except KeyError: if STRICT: raise PDFFontError('FontDescriptor is missing') descriptor = {} ttf = None if 'FontFile2' in descriptor: self.fontfile = stream_value(descriptor.get('FontFile2')) ttf = TrueTypeFont(self.basefont, StringIO(self.fontfile.get_data())) self.unicode_map = None if 'ToUnicode' in spec: strm = stream_value(spec['ToUnicode']) self.unicode_map = FileUnicodeMap() CMapParser(self.unicode_map, StringIO(strm.get_data())).run() elif self.cidcoding == 'Adobe-Identity': if ttf: try: self.unicode_map = ttf.create_unicode_map() except TrueTypeFont.CMapNotFound: pass else: try: self.unicode_map = CMapDB.get_unicode_map( self.cidcoding, self.cmap.is_vertical()) except CMapDB.CMapNotFound as e: pass self.vertical = self.cmap.is_vertical() if self.vertical: # writing mode: vertical widths = get_widths2(list_value(spec.get('W2', []))) self.disps = dict( (cid, (vx, vy)) for (cid, (_, (vx, vy))) in widths.iteritems()) (vy, w) = spec.get('DW2', [880, -1000]) self.default_disp = (None, vy) widths = dict((cid, w) for (cid, (w, _)) in widths.iteritems()) default_width = w else: # writing mode: horizontal self.disps = {} self.default_disp = 0 widths = get_widths(list_value(spec.get('W', []))) default_width = spec.get('DW', 1000) PDFFont.__init__(self, descriptor, widths, default_width=default_width) return