class PDFPasswordIncorrect(PDFEncryptionError): pass class PDFTypeError(PDFException): pass class PDFValueError(PDFException): pass class PDFNotImplementedError(PSException): pass # some predefined literals and keywords. LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm') LITERAL_XREF = PSLiteralTable.intern('XRef') LITERAL_PAGE = PSLiteralTable.intern('Page') LITERAL_PAGES = PSLiteralTable.intern('Pages') LITERAL_CATALOG = PSLiteralTable.intern('Catalog') LITERAL_CRYPT = PSLiteralTable.intern('Crypt') LITERAL_FLATE_DECODE = PSLiteralTable.intern('FlateDecode') LITERAL_LZW_DECODE = PSLiteralTable.intern('LZWDecode') KEYWORD_R = PSKeywordTable.intern('R') KEYWORD_OBJ = PSKeywordTable.intern('obj') KEYWORD_ENDOBJ = PSKeywordTable.intern('endobj') KEYWORD_STREAM = PSKeywordTable.intern('stream') KEYWORD_XREF = PSKeywordTable.intern('xref') KEYWORD_TRAILER = PSKeywordTable.intern('trailer') KEYWORD_STARTXREF = PSKeywordTable.intern('startxref') PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz' ## PDFObjRef ## class PDFObjRef: def __init__(self, doc, objid, _): if objid == 0:
def __repr__(self): return "<ColorSpace: %s, ncomponents=%d>" % (self.name, self.ncomponents) ## Constants ## LITERAL_PDF = PSLiteralTable.intern("PDF") LITERAL_TEXT = PSLiteralTable.intern("Text") LITERAL_FONT = PSLiteralTable.intern("Font") LITERAL_FORM = PSLiteralTable.intern("Form") LITERAL_IMAGE = PSLiteralTable.intern("Image") LITERAL_STANDARD_ENCODING = PSLiteralTable.intern("StandardEncoding") LITERAL_DEVICE_GRAY = PSLiteralTable.intern("DeviceGray") LITERAL_DEVICE_RGB = PSLiteralTable.intern("DeviceRGB") LITERAL_DEVICE_CMYK = PSLiteralTable.intern("DeviceCMYK") KEYWORD_BI = PSKeywordTable.intern("BI") KEYWORD_ID = PSKeywordTable.intern("ID") KEYWORD_EI = PSKeywordTable.intern("EI") MATRIX_IDENTITY = (1, 0, 0, 1, 0, 0) PREDEFINED_COLORSPACE = dict( (name, ColorSpace(name, n)) for (name, n) in { "CalRGB": 3, "CalGray": 1, "Lab": 3, "DeviceRGB": 3, "DeviceCMYK": 4, "DeviceGray": 1, "Separation": 1, "Indexed": 1,
class PDFContentParser(PSStackParser): def __init__(self, streams): self.streams = streams self.istream = 0 PSStackParser.__init__(self, None) return def fillfp(self): if not self.fp: if self.istream < len(self.streams): strm = stream_value(self.streams[self.istream]) self.istream += 1 else: raise PSEOF('Unexpected EOF, file truncated?') self.fp = StringIO(strm.get_data()) return def seek(self, pos): self.fillfp() PSStackParser.seek(self, pos) return def fillbuf(self): if self.charpos < len(self.buf): return while 1: self.fillfp() self.bufpos = self.fp.tell() self.buf = self.fp.read(self.BUFSIZ) if self.buf: break self.fp = None self.charpos = 0 return def get_inline_data(self, pos, target='EI'): self.seek(pos) i = 0 data = '' while i <= len(target): self.fillbuf() if i: c = self.buf[self.charpos] data += c self.charpos += 1 if i >= len(target) and c.isspace(): i += 1 elif c == target[i]: i += 1 else: i = 0 else: try: j = self.buf.index(target[0], self.charpos) #print 'found', (0, self.buf[j:j+10]) data += self.buf[self.charpos:j + 1] self.charpos = j + 1 i = 1 except ValueError: data += self.buf[self.charpos:] self.charpos = len(self.buf) data = data[:-(len(target) + 1)] # strip the last part data = re.sub(r'(\x0d\x0a|[\x0d\x0a])', '', data) return (pos, data) def flush(self): self.add_results(*self.popall()) return KEYWORD_BI = PSKeywordTable.intern('BI') KEYWORD_ID = PSKeywordTable.intern('ID') KEYWORD_EI = PSKeywordTable.intern('EI') def do_keyword(self, pos, token): if token is self.KEYWORD_BI: # inline image within a content stream self.start_type(pos, 'inline') elif token is self.KEYWORD_ID: try: (_, objs) = self.end_type('inline') if len(objs) % 2 != 0: raise PSTypeError('Invalid dictionary construct: %r' % objs) d = dict((literal_name(k), v) for (k, v) in choplist(2, objs)) (pos, data) = self.get_inline_data(pos + len('ID ')) obj = PDFStream(d, data) self.push((pos, obj)) self.push((pos, self.KEYWORD_EI)) except PSTypeError: if STRICT: raise else: self.push((pos, token)) return