#!/usr/bin/env python import zlib from lzw import lzwdecode from ascii85 import ascii85decode, asciihexdecode from runlength import rldecode from ccitt import ccittfaxdecode from psparser import PSException, PSObject from psparser import LIT, STRICT from utils import apply_png_predictor, isnumber LITERAL_CRYPT = LIT('Crypt') # Abbreviation of Filter names in PDF 4.8.6. "Inline Images" LITERALS_FLATE_DECODE = (LIT('FlateDecode'), LIT('Fl')) LITERALS_LZW_DECODE = (LIT('LZWDecode'), LIT('LZW')) LITERALS_ASCII85_DECODE = (LIT('ASCII85Decode'), LIT('A85')) LITERALS_ASCIIHEX_DECODE = (LIT('ASCIIHexDecode'), LIT('AHx')) LITERALS_RUNLENGTH_DECODE = (LIT('RunLengthDecode'), LIT('RL')) LITERALS_CCITTFAX_DECODE = (LIT('CCITTFaxDecode'), LIT('CCF')) LITERALS_DCT_DECODE = (LIT('DCTDecode'), LIT('DCT')) ## PDF Objects ## class PDFObject(PSObject): pass class PDFException(PSException): pass class PDFTypeError(PDFException):
class PDFDestinationNotFound(PDFException): pass class PDFEncryptionError(PDFException): pass class PDFPasswordIncorrect(PDFEncryptionError): pass # some predefined literals and keywords. LITERAL_OBJSTM = LIT('ObjStm') LITERAL_XREF = LIT('XRef') LITERAL_CATALOG = LIT('Catalog') ## XRefs ## class PDFBaseXRef(object): def get_trailer(self): raise NotImplementedError def get_objids(self): return [] # Must return # (strmid, index, genno)
from arcfour import Arcfour from utils import choplist, nunpack from utils import decode_text, ObjIdRange ## Exceptions ## class PDFSyntaxError(PDFException): pass class PDFNoValidXRef(PDFSyntaxError): pass class PDFNoOutlines(PDFException): pass class PDFDestinationNotFound(PDFException): pass class PDFEncryptionError(PDFException): pass class PDFPasswordIncorrect(PDFEncryptionError): pass # some predefined literals and keywords. LITERAL_OBJSTM = LIT('ObjStm') LITERAL_XREF = LIT('XRef') LITERAL_PAGE = LIT('Page') LITERAL_PAGES = LIT('Pages') LITERAL_CATALOG = LIT('Catalog') ## XRefs ## class PDFBaseXRef(object): def get_trailer(self): raise NotImplementedError def get_objids(self): return []
from pdfcolor import PREDEFINED_COLORSPACE from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB from pdfcolor import LITERAL_DEVICE_CMYK from utils import choplist from utils import mult_matrix, MATRIX_IDENTITY ## Exceptions ## class PDFResourceError(PDFException): pass class PDFInterpreterError(PDFException): pass ## Constants ## LITERAL_PDF = LIT('PDF') LITERAL_TEXT = LIT('Text') LITERAL_FONT = LIT('Font') LITERAL_FORM = LIT('Form') LITERAL_IMAGE = LIT('Image') ## PDFTextState ## class PDFTextState(object): def __init__(self): self.font = None self.fontsize = 0 self.charspace = 0 self.wordspace = 0
else: assert 0 # create unicode map unicode_map = FileUnicodeMap() for (char,gid) in char2gid.iteritems(): unicode_map.add_cid2unichr(gid, char) return unicode_map ## Fonts ## class PDFFontError(PDFException): pass class PDFUnicodeNotDefined(PDFFontError): pass LITERAL_STANDARD_ENCODING = LIT('StandardEncoding') LITERAL_TYPE1C = LIT('Type1C') # PDFFont class PDFFont(object): def __init__(self, descriptor, widths, default_width=None): self.descriptor = descriptor self.widths = widths self.fontname = resolve1(descriptor.get('FontName', 'unknown')) if isinstance(self.fontname, PSLiteral): self.fontname = literal_name(self.fontname) self.flags = int_value(descriptor.get('Flags', 0)) self.ascent = num_value(descriptor.get('Ascent', 0)) self.descent = num_value(descriptor.get('Descent', 0))
#!/usr/bin/env python2 from psparser import LIT ## PDFColorSpace ## LITERAL_DEVICE_GRAY = LIT('DeviceGray') LITERAL_DEVICE_RGB = LIT('DeviceRGB') LITERAL_DEVICE_CMYK = LIT('DeviceCMYK') class PDFColorSpace(object): def __init__(self, name, ncomponents): self.name = name self.ncomponents = ncomponents return def __repr__(self): return '<PDFColorSpace: %s, ncomponents=%d>' % (self.name, self.ncomponents) PREDEFINED_COLORSPACE = dict( (name, PDFColorSpace(name, n)) for (name, n) in { 'CalRGB': 3, 'CalGray': 1, 'Lab': 3, 'DeviceRGB': 3, 'DeviceCMYK': 4, 'DeviceGray': 1, 'Separation': 1, 'Indexed': 1,
#!/usr/bin/env python import sys from psparser import LIT from pdftypes import PDFObjectNotFound from pdftypes import resolve1 from pdftypes import int_value, list_value, dict_value from pdfparser import PDFParser from pdfdocument import PDFDocument from pdfdocument import PDFEncryptionError # some predefined literals and keywords. LITERAL_PAGE = LIT('Page') LITERAL_PAGES = LIT('Pages') ## PDFPage ## class PDFPage(object): """An object that holds the information about a page. A PDFPage object is merely a convenience class that has a set of keys and values, which describe the properties of a page and point to its contents. Attributes: doc: a PDFDocument object. pageid: any Python object that can uniquely identify the page. attrs: a dictionary of page attributes. contents: a list of PDFStream objects that represents the page content. lastmod: the last modified time of the page. resources: a list of resources used by the page.