Exemple #1
0
class PDFPasswordIncorrect(PDFEncryptionError): pass
class PDFTypeError(PDFException): pass
class PDFValueError(PDFException): pass
class PDFNotImplementedError(PSException): pass


# some predefined literals and keywords.
LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm')
LITERAL_XREF = PSLiteralTable.intern('XRef')
LITERAL_PAGE = PSLiteralTable.intern('Page')
LITERAL_PAGES = PSLiteralTable.intern('Pages')
LITERAL_CATALOG = PSLiteralTable.intern('Catalog')
LITERAL_CRYPT = PSLiteralTable.intern('Crypt')
LITERAL_FLATE_DECODE = PSLiteralTable.intern('FlateDecode')
LITERAL_LZW_DECODE = PSLiteralTable.intern('LZWDecode')
KEYWORD_R = PSKeywordTable.intern('R')
KEYWORD_OBJ = PSKeywordTable.intern('obj')
KEYWORD_ENDOBJ = PSKeywordTable.intern('endobj')
KEYWORD_STREAM = PSKeywordTable.intern('stream')
KEYWORD_XREF = PSKeywordTable.intern('xref')
KEYWORD_TRAILER = PSKeywordTable.intern('trailer')
KEYWORD_STARTXREF = PSKeywordTable.intern('startxref')
PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'


##  PDFObjRef
##
class PDFObjRef:
  
  def __init__(self, doc, objid, _):
    if objid == 0:
Exemple #2
0
    def __repr__(self):
        return "<ColorSpace: %s, ncomponents=%d>" % (self.name, self.ncomponents)


##  Constants
##
LITERAL_PDF = PSLiteralTable.intern("PDF")
LITERAL_TEXT = PSLiteralTable.intern("Text")
LITERAL_FONT = PSLiteralTable.intern("Font")
LITERAL_FORM = PSLiteralTable.intern("Form")
LITERAL_IMAGE = PSLiteralTable.intern("Image")
LITERAL_STANDARD_ENCODING = PSLiteralTable.intern("StandardEncoding")
LITERAL_DEVICE_GRAY = PSLiteralTable.intern("DeviceGray")
LITERAL_DEVICE_RGB = PSLiteralTable.intern("DeviceRGB")
LITERAL_DEVICE_CMYK = PSLiteralTable.intern("DeviceCMYK")
KEYWORD_BI = PSKeywordTable.intern("BI")
KEYWORD_ID = PSKeywordTable.intern("ID")
KEYWORD_EI = PSKeywordTable.intern("EI")
MATRIX_IDENTITY = (1, 0, 0, 1, 0, 0)

PREDEFINED_COLORSPACE = dict(
    (name, ColorSpace(name, n))
    for (name, n) in {
        "CalRGB": 3,
        "CalGray": 1,
        "Lab": 3,
        "DeviceRGB": 3,
        "DeviceCMYK": 4,
        "DeviceGray": 1,
        "Separation": 1,
        "Indexed": 1,
Exemple #3
0
class PDFContentParser(PSStackParser):
    def __init__(self, streams):
        self.streams = streams
        self.istream = 0
        PSStackParser.__init__(self, None)
        return

    def fillfp(self):
        if not self.fp:
            if self.istream < len(self.streams):
                strm = stream_value(self.streams[self.istream])
                self.istream += 1
            else:
                raise PSEOF('Unexpected EOF, file truncated?')
            self.fp = StringIO(strm.get_data())
        return

    def seek(self, pos):
        self.fillfp()
        PSStackParser.seek(self, pos)
        return

    def fillbuf(self):
        if self.charpos < len(self.buf): return
        while 1:
            self.fillfp()
            self.bufpos = self.fp.tell()
            self.buf = self.fp.read(self.BUFSIZ)
            if self.buf: break
            self.fp = None
        self.charpos = 0
        return

    def get_inline_data(self, pos, target='EI'):
        self.seek(pos)
        i = 0
        data = ''
        while i <= len(target):
            self.fillbuf()
            if i:
                c = self.buf[self.charpos]
                data += c
                self.charpos += 1
                if i >= len(target) and c.isspace():
                    i += 1
                elif c == target[i]:
                    i += 1
                else:
                    i = 0
            else:
                try:
                    j = self.buf.index(target[0], self.charpos)
                    #print 'found', (0, self.buf[j:j+10])
                    data += self.buf[self.charpos:j + 1]
                    self.charpos = j + 1
                    i = 1
                except ValueError:
                    data += self.buf[self.charpos:]
                    self.charpos = len(self.buf)
        data = data[:-(len(target) + 1)]  # strip the last part
        data = re.sub(r'(\x0d\x0a|[\x0d\x0a])', '', data)
        return (pos, data)

    def flush(self):
        self.add_results(*self.popall())
        return

    KEYWORD_BI = PSKeywordTable.intern('BI')
    KEYWORD_ID = PSKeywordTable.intern('ID')
    KEYWORD_EI = PSKeywordTable.intern('EI')

    def do_keyword(self, pos, token):
        if token is self.KEYWORD_BI:
            # inline image within a content stream
            self.start_type(pos, 'inline')
        elif token is self.KEYWORD_ID:
            try:
                (_, objs) = self.end_type('inline')
                if len(objs) % 2 != 0:
                    raise PSTypeError('Invalid dictionary construct: %r' %
                                      objs)
                d = dict((literal_name(k), v) for (k, v) in choplist(2, objs))
                (pos, data) = self.get_inline_data(pos + len('ID '))
                obj = PDFStream(d, data)
                self.push((pos, obj))
                self.push((pos, self.KEYWORD_EI))
            except PSTypeError:
                if STRICT: raise
        else:
            self.push((pos, token))
        return