#!/usr/bin/env python
import zlib
from lzw import lzwdecode
from ascii85 import ascii85decode, asciihexdecode
from runlength import rldecode
from ccitt import ccittfaxdecode
from psparser import PSException, PSObject
from psparser import LIT, STRICT
from utils import apply_png_predictor, isnumber

LITERAL_CRYPT = LIT('Crypt')

# Abbreviation of Filter names in PDF 4.8.6. "Inline Images"
LITERALS_FLATE_DECODE = (LIT('FlateDecode'), LIT('Fl'))
LITERALS_LZW_DECODE = (LIT('LZWDecode'), LIT('LZW'))
LITERALS_ASCII85_DECODE = (LIT('ASCII85Decode'), LIT('A85'))
LITERALS_ASCIIHEX_DECODE = (LIT('ASCIIHexDecode'), LIT('AHx'))
LITERALS_RUNLENGTH_DECODE = (LIT('RunLengthDecode'), LIT('RL'))
LITERALS_CCITTFAX_DECODE = (LIT('CCITTFaxDecode'), LIT('CCF'))
LITERALS_DCT_DECODE = (LIT('DCTDecode'), LIT('DCT'))


##  PDF Objects
##
class PDFObject(PSObject):
    pass

class PDFException(PSException):
    pass

class PDFTypeError(PDFException):
예제 #2
0

class PDFDestinationNotFound(PDFException):
    pass


class PDFEncryptionError(PDFException):
    pass


class PDFPasswordIncorrect(PDFEncryptionError):
    pass


# some predefined literals and keywords.
LITERAL_OBJSTM = LIT('ObjStm')
LITERAL_XREF = LIT('XRef')
LITERAL_CATALOG = LIT('Catalog')


##  XRefs
##
class PDFBaseXRef(object):
    def get_trailer(self):
        raise NotImplementedError

    def get_objids(self):
        return []

    # Must return
    #     (strmid, index, genno)
예제 #3
0
파일: pdfparser.py 프로젝트: xarg/pdfminer
from arcfour import Arcfour
from utils import choplist, nunpack
from utils import decode_text, ObjIdRange


##  Exceptions
##
class PDFSyntaxError(PDFException): pass
class PDFNoValidXRef(PDFSyntaxError): pass
class PDFNoOutlines(PDFException): pass
class PDFDestinationNotFound(PDFException): pass
class PDFEncryptionError(PDFException): pass
class PDFPasswordIncorrect(PDFEncryptionError): pass

# some predefined literals and keywords.
LITERAL_OBJSTM = LIT('ObjStm')
LITERAL_XREF = LIT('XRef')
LITERAL_PAGE = LIT('Page')
LITERAL_PAGES = LIT('Pages')
LITERAL_CATALOG = LIT('Catalog')


##  XRefs
##
class PDFBaseXRef(object):

    def get_trailer(self):
        raise NotImplementedError

    def get_objids(self):
        return []
예제 #4
0
from pdfcolor import PREDEFINED_COLORSPACE
from pdfcolor import LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB
from pdfcolor import LITERAL_DEVICE_CMYK
from utils import choplist
from utils import mult_matrix, MATRIX_IDENTITY


##  Exceptions
##
class PDFResourceError(PDFException): pass
class PDFInterpreterError(PDFException): pass


##  Constants
##
LITERAL_PDF = LIT('PDF')
LITERAL_TEXT = LIT('Text')
LITERAL_FONT = LIT('Font')
LITERAL_FORM = LIT('Form')
LITERAL_IMAGE = LIT('Image')


##  PDFTextState
##
class PDFTextState(object):

    def __init__(self):
        self.font = None
        self.fontsize = 0
        self.charspace = 0
        self.wordspace = 0
예제 #5
0
            else:
                assert 0
        # create unicode map
        unicode_map = FileUnicodeMap()
        for (char,gid) in char2gid.iteritems():
            unicode_map.add_cid2unichr(gid, char)
        return unicode_map


##  Fonts
##

class PDFFontError(PDFException): pass
class PDFUnicodeNotDefined(PDFFontError): pass

LITERAL_STANDARD_ENCODING = LIT('StandardEncoding')
LITERAL_TYPE1C = LIT('Type1C')


# PDFFont
class PDFFont(object):

    def __init__(self, descriptor, widths, default_width=None):
        self.descriptor = descriptor
        self.widths = widths
        self.fontname = resolve1(descriptor.get('FontName', 'unknown'))
        if isinstance(self.fontname, PSLiteral):
            self.fontname = literal_name(self.fontname)
        self.flags = int_value(descriptor.get('Flags', 0))
        self.ascent = num_value(descriptor.get('Ascent', 0))
        self.descent = num_value(descriptor.get('Descent', 0))
예제 #6
0
#!/usr/bin/env python2
from psparser import LIT

##  PDFColorSpace
##
LITERAL_DEVICE_GRAY = LIT('DeviceGray')
LITERAL_DEVICE_RGB = LIT('DeviceRGB')
LITERAL_DEVICE_CMYK = LIT('DeviceCMYK')


class PDFColorSpace(object):
    def __init__(self, name, ncomponents):
        self.name = name
        self.ncomponents = ncomponents
        return

    def __repr__(self):
        return '<PDFColorSpace: %s, ncomponents=%d>' % (self.name,
                                                        self.ncomponents)


PREDEFINED_COLORSPACE = dict(
    (name, PDFColorSpace(name, n)) for (name, n) in {
        'CalRGB': 3,
        'CalGray': 1,
        'Lab': 3,
        'DeviceRGB': 3,
        'DeviceCMYK': 4,
        'DeviceGray': 1,
        'Separation': 1,
        'Indexed': 1,
예제 #7
0
#!/usr/bin/env python
import sys
from psparser import LIT
from pdftypes import PDFObjectNotFound
from pdftypes import resolve1
from pdftypes import int_value, list_value, dict_value
from pdfparser import PDFParser
from pdfdocument import PDFDocument
from pdfdocument import PDFEncryptionError

# some predefined literals and keywords.
LITERAL_PAGE = LIT('Page')
LITERAL_PAGES = LIT('Pages')


##  PDFPage
##
class PDFPage(object):
    """An object that holds the information about a page.

    A PDFPage object is merely a convenience class that has a set
    of keys and values, which describe the properties of a page
    and point to its contents.

    Attributes:
      doc: a PDFDocument object.
      pageid: any Python object that can uniquely identify the page.
      attrs: a dictionary of page attributes.
      contents: a list of PDFStream objects that represents the page content.
      lastmod: the last modified time of the page.
      resources: a list of resources used by the page.