コード例 #1
0
ファイル: buildxobj.py プロジェクト: zhzhzoo/pdfrw
def pagexobj(page, viewinfo=ViewInfo(), allow_compressed=True):
    ''' pagexobj creates and returns a Form XObject for
        a given view within a page (Defaults to entire page.)
    '''
    inheritable = page.inheritable
    resources = inheritable.Resources
    rotation = get_rotation(inheritable.Rotate)
    mbox, bbox = getrects(inheritable, viewinfo, rotation)
    rotation += get_rotation(viewinfo.rotate)
    if isinstance(page.Contents, PdfArray):
        if len(page.Contents) == 1:
            contents = page.Contents[0]
        else:
            # decompress and join multiple streams
            contlist = [c for c in page.Contents]
            uncompress(contlist)
            stream = '\n'.join([c.stream for c in contlist])
            contents = PdfDict(stream=stream)
    else:
        contents = page.Contents
    # Make sure the only attribute is length
    # All the filters must have been executed
    assert int(contents.Length) == len(contents.stream)
    if not allow_compressed:
        assert len([x for x in contents.iteritems()]) == 1
    return _cache_xobj(contents, resources, mbox, bbox, rotation)
コード例 #2
0
def pagexobj(page, viewinfo=ViewInfo(), allow_compressed=True):
    ''' pagexobj creates and returns a Form XObject for
        a given view within a page (Defaults to entire page.)
    '''
    inheritable = page.inheritable
    resources = inheritable.Resources
    rotation = get_rotation(inheritable.Rotate)
    mbox, bbox = getrects(inheritable, viewinfo, rotation)
    rotation += get_rotation(viewinfo.rotate)
    if isinstance(page.Contents, PdfArray):
        if len(page.Contents) == 1:
            contents = page.Contents[0]
        else:
            # decompress and join multiple streams
            contlist = [c for c in page.Contents]
            uncompress(contlist)
            stream = '\n'.join([c.stream for c in contlist])
            contents = PdfDict(stream=stream)
    else:
        contents = page.Contents
    # Make sure the only attribute is length
    # All the filters must have been executed
    assert int(contents.Length) == len(contents.stream)
    if not allow_compressed:
        assert len([x for x in contents.iteritems()]) == 1
    return _cache_xobj(contents, resources, mbox, bbox, rotation)
コード例 #3
0
def find_images(file):
    pages = PdfReader(file).pages
    for page in pages:
        for obj in find_objects(page):
            if obj.Subtype == PdfName.Image and obj['/Height'] == '1':
                try:
                    uncompress([obj], leave_raw=True)
                    result = obj.stream.decode('utf-16be')
                    yield json.loads(result)
                except Exception as e:
                    pass
コード例 #4
0
    def parse_xref_stream(self,
                          source,
                          int=int,
                          range=range,
                          enumerate=enumerate,
                          islice=itertools.islice,
                          defaultdict=collections.defaultdict,
                          hexlify=binascii.hexlify):
        ''' Parse (one of) the cross-reference file section(s)
        '''
        def readint(s, lengths):
            offset = 0
            for length in itertools.cycle(lengths):
                next = offset + length
                yield int(hexlify(s[offset:next]), 16) if length else None
                offset = next

        setdefault = source.obj_offsets.setdefault
        next = source.next
        # check for xref stream object
        objid = source.multiple(3)
        ok = len(objid) == 3
        ok = ok and objid[0].isdigit()
        ok = ok and objid[1] == 'obj'
        ok = ok and objid[2] == '<<'
        if not ok:
            source.exception('Expected xref stream start')
        obj = self.readdict(source)
        if obj.Type != PdfName.XRef:
            source.exception('Expected dict type of /XRef')
        tok = next()
        self.readstream(obj, self.findstream(obj, tok, source), source, True)
        old_strm = obj.stream
        if not uncompress([obj], True):
            source.exception('Could not decompress Xref stream')
        stream = obj.stream
        # Fix for issue #76 -- goofy compressed xref stream
        # that is NOT ACTUALLY COMPRESSED
        stream = stream if stream is not old_strm else convert_store(old_strm)
        num_pairs = obj.Index or PdfArray(['0', obj.Size])
        num_pairs = [int(x) for x in num_pairs]
        num_pairs = zip(num_pairs[0::2], num_pairs[1::2])
        entry_sizes = [int(x) for x in obj.W]
        if len(entry_sizes) != 3:
            source.exception('Invalid entry size')
        object_streams = defaultdict(list)
        get = readint(stream, entry_sizes)
        for objnum, size in num_pairs:
            for cnt in range(size):
                xtype, p1, p2 = islice(get, 3)
                if xtype in (1, None):
                    if p1:
                        setdefault((objnum, p2 or 0), p1)
                elif xtype == 2:
                    object_streams[p1].append((objnum, p2))
                objnum += 1

        obj.private.object_streams = object_streams
        return obj
コード例 #5
0
    def load_stream_objects(self, object_streams):
        # read object streams
        objs = []
        for num in object_streams:
            obj = self.findindirect(num, 0).real_value()
            assert obj.Type == '/ObjStm'
            objs.append(obj)

        # read objects from stream
        if objs:
            # Decrypt
            if self.crypt_filters is not None:
                crypt.decrypt_objects(objs, self.stream_crypt_filter,
                                      self.crypt_filters)

            # Decompress
            uncompress(objs)

            for obj in objs:
                objsource = PdfTokens(obj.stream, 0, False)
                next = objsource.next
                offsets = []
                firstoffset = int(obj.First)
                while objsource.floc < firstoffset:
                    offsets.append((int(next()), firstoffset + int(next())))
                for num, offset in offsets:
                    # Read the object, and call special code if it starts
                    # an array or dictionary
                    objsource.floc = offset
                    sobj = next()
                    func = self.special.get(sobj)
                    if func is not None:
                        sobj = func(objsource)

                    key = (num, 0)
                    self.indirect_objects[key] = sobj
                    if key in self.deferred_objects:
                        self.deferred_objects.remove(key)

                    # Mark the object as indirect, and
                    # add it to the list of streams if it starts a stream
                    sobj.indirect = key
コード例 #6
0
def make_popup(page, rect, popupname, popup, code):
    from pdfrw import PdfDict, PdfArray, PdfName
    from pdfrw.uncompress import uncompress
    codeword_on, codeword_off = code

    show_action = PdfDict(S=PdfName.SetOCGState,
                          State=PdfArray([PdfName.OFF] + codeword_off +
                                         [PdfName.ON] + codeword_on))

    link = PdfDict(indirect=True,
                   Type=PdfName.Annot,
                   H=PdfName.I,
                   Subtype=PdfName.Link,
                   A=show_action,
                   Rect=rect)

    if pdf_popup_config['popuplinkcolor']:
        link.C = PdfArray(pdf_popup_config['popuplinkcolor'])
    else:
        link.Border = [0, 0, 0]

    page.Annots.append(link)

    ocmd = PdfDict(Type=PdfName.OCMD, OCGs=codeword_on, P=PdfName.AllOn)

    popup_pdfname = '/SPopup' + popupname
    ocmd_pdfname = '/SPopupOCMD{}'.format(popup_unique_id())

    if not page.Resources.Properties: page.Resources.Properties = PdfDict()
    if not page.Resources.XObject: page.Resources.XObject = PdfDict()

    page.Resources.XObject[popup_pdfname] = popup
    page.Resources.Properties[ocmd_pdfname] = ocmd
    if page.Contents.Filter:
        uncompress(
            [page.Contents]
        )  # Important. Otherwise appending to stream add plain text to compressed stream
    page.Contents.stream += "q /OC {ocmd} BDC 1 0 0 1 {x} {y} cm {popup} Do EMC Q\n".\
                            format(x=rect[0],y=float(rect[1])-popup.BBox[3],
                                   ocmd=ocmd_pdfname,
                                   popup=popup_pdfname)
コード例 #7
0
    def load_stream_objects(self, object_streams):
        # read object streams
        objs = []
        for num in object_streams.iterkeys():
            obj = self.findindirect(num, 0).real_value()
            assert obj.Type == '/ObjStm'
            objs.append(obj)

        # read objects from stream
        if objs:
            uncompress(objs)
            for obj in objs:
                objsource = PdfTokens(obj.stream, 0, False)
                snext = objsource.next
                offsets = {}
                firstoffset = int(obj.First)
                num = snext()
                while num.isdigit():
                    offset = int(snext())
                    offsets[int(num)] = firstoffset + offset
                    num = snext()
                for num, offset in offsets.iteritems():
                    # Read the object, and call special code if it starts
                    # an array or dictionary
                    objsource.floc = offset
                    sobj = snext()
                    func = self.special.get(sobj)
                    if func is not None:
                        sobj = func(objsource)

                    key = (num, 0)
                    self.indirect_objects[key] = sobj
                    if key in self.deferred_objects:
                        self.deferred_objects.remove(key)

                    # Mark the object as indirect, and
                    # add it to the list of streams if it starts a stream
                    sobj.indirect = key
コード例 #8
0
ファイル: pdfreader.py プロジェクト: zhzhzoo/pdfrw
    def load_stream_objects(self, object_streams):
        # read object streams
        objs = []
        for num in object_streams.iterkeys():
            obj = self.findindirect(num, 0).real_value()
            assert obj.Type == '/ObjStm'
            objs.append(obj)

        # read objects from stream
        if objs:
            uncompress(objs)
            for obj in objs:
                objsource = PdfTokens(obj.stream, 0, False)
                snext = objsource.next
                offsets = {}
                firstoffset = int(obj.First)
                num = snext()
                while num.isdigit():
                    offset = int(snext())
                    offsets[int(num)] = firstoffset + offset
                    num = snext()
                for num, offset in offsets.iteritems():
                    # Read the object, and call special code if it starts
                    # an array or dictionary
                    objsource.floc = offset
                    sobj = snext()
                    func = self.special.get(sobj)
                    if func is not None:
                        sobj = func(objsource)

                    key = (num, 0)
                    self.indirect_objects[key] = sobj
                    if key in self.deferred_objects:
                        self.deferred_objects.remove(key)

                    # Mark the object as indirect, and
                    # add it to the list of streams if it starts a stream
                    sobj.indirect = key
コード例 #9
0
 def uncompress(self):
     self.read_all()
     uncompress(self.indirect_objects.itervalues())
コード例 #10
0
ファイル: pdfreader.py プロジェクト: deepsign/novela-workflow
 def uncompress(self):
     self.read_all()
     uncompress(self.indirect_objects.itervalues())
コード例 #11
0
    def parsexref(self, source, int=int, range=range):
        ''' Parse (one of) the cross-reference file section(s)
        '''
        def _pairs(array):
            i = 0
            while 1:
                yield int(array[i]), int(array[i + 1])
                i += 2
                if (i + 1) >= len(array):
                    break

        def convert_to_int(d, size):
            if size > 8:
                source.exception('Invalid size in convert_to_int')
            d = '\x00\x00\x00\x00\x00\x00\x00\x00' + d
            d = d[-8:]
            return struct.unpack('>q', d)[0]

        def read_trailer():
            tok = next()
            if tok != '<<':
                source.exception('Expected "<<" starting catalog')
            return self.readdict(source)

        setdefault = source.obj_offsets.setdefault
        add_offset = source.all_offsets.append
        next = source.next
        tok = next()
        if tok.isdigit():
            # check for xref stream object
            objid = source.multiple(2)
            ok = len(objid) == 2
            ok = ok and objid[0].isdigit()
            ok = ok and objid[1] == 'obj'
            if ok:
                next()  # start of dict
                obj = self.readdict(source)
                assert obj.Type == '/XRef'
                tok = next()
                end = source.floc + int(obj.Length)
                self.readstream(obj, self.findstream(obj, tok, source), source)
                uncompress([obj])
                num_pairs = obj.Index or PdfArray(['0', obj.Size])
                entry_sizes = [int(x) for x in obj.W]
                object_streams = {}
                for num, size in _pairs(num_pairs):
                    cnt = 0
                    stream_offset = 0
                    while cnt < size:
                        for i in range(len(entry_sizes)):
                            d = obj.stream[stream_offset:stream_offset +
                                           entry_sizes[i]]
                            stream_offset += entry_sizes[i]
                            di = convert_to_int(d, entry_sizes[i])
                            if i == 0:
                                xref_type = di
                                if xref_type == 0 and entry_sizes[0] == 0:
                                    xref_type = 1
                            elif i == 1:
                                if xref_type == 1:
                                    offset = di
                                elif xref_type == 2:
                                    objnum = di
                            elif i == 2:
                                if xref_type == 1:
                                    generation = di
                                elif xref_type == 2:
                                    obstr_idx = di
                        if xref_type == 1 and offset != 0:
                            setdefault((num, generation), offset)
                            add_offset(offset)
                        elif xref_type == 2:
                            if not objnum in object_streams:
                                object_streams[objnum] = []
                            object_streams[objnum].append(obstr_idx)
                        cnt += 1
                        num += 1

                self.load_stream_objects(object_streams)

                source.floc = end
                endit = source.multiple(2)
                if endit != ['endstream', 'endobj']:
                    source.exception('Expected endstream endobj')
                return obj
            else:
                source.exception('Expected xref stream')

        elif tok == 'xref':
            # plain xref table
            start = source.floc
            try:
                while 1:
                    tok = next()
                    if tok == 'trailer':
                        return read_trailer()
                    startobj = int(tok)
                    for objnum in range(startobj, startobj + int(next())):
                        offset = int(next())
                        generation = int(next())
                        inuse = next()
                        if inuse == 'n':
                            if offset != 0:
                                setdefault((objnum, generation), offset)
                                add_offset(offset)
                        elif inuse != 'f':
                            raise ValueError
            except:
                pass
            try:
                # Table formatted incorrectly.
                # See if we can figure it out anyway.
                end = source.fdata.rindex('trailer', start)
                table = source.fdata[start:end].splitlines()
                for line in table:
                    tokens = line.split()
                    if len(tokens) == 2:
                        objnum = int(tokens[0])
                    elif len(tokens) == 3:
                        offset, generation, inuse = \
                            int(tokens[0]), int(tokens[1]), tokens[2]
                        if offset != 0 and inuse == 'n':
                            setdefault((objnum, generation), offset)
                            add_offset(offset)
                        objnum += 1
                    elif tokens:
                        log.error('Invalid line in xref table: %s' %
                                  repr(line))
                        raise ValueError
                log.warning('Badly formatted xref table')
                source.floc = end
                next()
            except:
                source.floc = start
                source.exception('Invalid table format')

            return read_trailer()
        else:
            source.exception('Expected "xref" keyword or xref stream object')
コード例 #12
0
def do_apply_ocg(basepage, rmpage, i, uses_base_pdf, ocgprop, annotations):
    ocgpage = IndirectPdfDict(Type=PdfName('OCG'), Name='Page ' + str(i + 1))
    ocgprop.OCGs.append(ocgpage)

    # The Order dict is a Page, followed by Inner
    ocgorderinner = PdfArray()

    # Add Template OCG layer
    # If this uses a basepdf, the template is located
    # elsewhere.

    # If using a basepdf, assign its stream as a
    # 'Background' layer under this page. When the page
    # primary OCG is disabled, the background will
    # remain, making it easy to disable all annotations.
    if uses_base_pdf:
        ocgorigdoc = IndirectPdfDict(Type=PdfName('OCG'), Name='Background')
        ocgprop.OCGs.append(ocgorigdoc)
        ocgorderinner.append(ocgorigdoc)

        uncompress.uncompress([basepage.Contents])
        stream = basepage.Contents.stream
        stream = '/OC /ocgorigdoc BDC\n' \
            + stream \
            + 'EMC\n'
        basepage.Contents.stream = stream
        compress.compress([basepage.Contents])

        if '/Properties' in basepage.Resources:
            props = basepage.Resources.Properties
        else:
            props = PdfDict()
        props.ocgorigdoc = ocgorigdoc
        basepage.Resources.Properties = props

    # If not using a basepdf, assign the rmpage's stream
    # as a 'Template' layer under this page. It will be
    # affected by disabling the primary Page OCG (which
    # by itself is kind of useless for exported
    # notebooks).

    # Regardless of using a basepdf or not, put the
    # rmpage layers into their own OCGs.

    # If the template has an XObject, we want to skip
    # the first one. This happens when the template
    # contains a PNG. Question--what happens when the
    # template contains more than one PNG? How do we
    # detect all of those?

    template_xobj_keys = []
    vector_layers = []
    uncompress.uncompress([rmpage.Contents])
    if uses_base_pdf:
        # The entire thing is the page ocg
        stream = '/OC /ocgpage BDC\n'
        stream += rmpage.Contents.stream
        stream += 'EMC\n'
        rmpage.Contents.stream = stream
    else:
        stream = rmpage.Contents.stream
        # Mark the template ocg separate from page ocg
        template_endpos = 0
        page_inatpos = 0
        findkey = '1 w 2 J 2 j []0  d\nq\n'
        # Finds only the first instance, which should be
        # for the template.
        findloc = stream.find(findkey)
        if findloc < 0:
            # May be a vector, which we stick a marker
            # in for.
            # ?? Why is this a half-point off ??
            findkey = '799.500000 85 l\n'
            m = re.search(findkey, rmpage.Contents.stream)
            if m:
                findloc = m.start()
        if findloc > 0:
            template_endpos = findloc + len(findkey)
            # Add vector template OCG
            stream = '/OC /ocgtemplate BDC\n'
            stream += rmpage.Contents.stream[:template_endpos]
            stream += 'EMC\n'
            page_inatpos = len(stream)
            stream += rmpage.Contents.stream[template_endpos:]
            # Save stream
            rmpage.Contents.stream = stream

        # Add template ocg
        ocgtemplate = IndirectPdfDict(Type=PdfName('OCG'), Name='Template')
        ocgprop.OCGs.append(ocgtemplate)
        ocgorderinner.append(ocgtemplate)

        # If a template (which is SVG) has embedded PNG
        # images, those appear as XObjects. This will
        # mess up the layer order, so we will ignore
        # them later.
        template_xobj_keys = \
            re.findall(r'(\/Im[0-9]+)\s',
                        stream[:template_endpos])

        # Page ocg
        stream = rmpage.Contents.stream[:page_inatpos]
        stream += '/OC /ocgpage BDC\n'
        stream += rmpage.Contents.stream[page_inatpos:]
        stream += 'EMC\n'
        # Save stream
        rmpage.Contents.stream = stream

    # Find all other vector layers using the magic
    # point (DocumentPageLayer.render_to_painter()).
    # ?? Why is this a half-point off ??
    while True:
        m = re.search('420.500000 69 m\n', rmpage.Contents.stream)
        if not m:
            break
        stream = ''
        layerid = 'ocglayer{}'.format(len(vector_layers) + 1)
        stream = rmpage.Contents.stream[:m.start()]
        if len(vector_layers):
            # close previous layer
            stream += 'EMC\n'
        stream += '/OC /{} BDC\n'.format(layerid)
        stream += rmpage.Contents.stream[m.end():]
        vector_layers.append(layerid)
        rmpage.Contents.stream = stream
    # If we added vector layers, have to end the
    # first one.
    if len(vector_layers):
        stream = rmpage.Contents.stream + 'EMC\n'
        rmpage.Contents.stream = stream

    # Done--recompress the stream.
    compress.compress([rmpage.Contents])

    # There shouldn't be any Properties there since we
    # generated the rmpage ourselves, so don't bother
    # checking.
    rmpage.Resources.Properties = PdfDict(ocgpage=ocgpage)
    if not uses_base_pdf:
        rmpage.Resources.Properties.ocgtemplate = ocgtemplate

    # Add individual OCG layers (Bitmap)
    was_vector = True
    for n, key in enumerate(rmpage.Resources.XObject):
        if str(key) in template_xobj_keys:
            continue
        was_vector = False
        l = n - len(template_xobj_keys)
        # This would indicate a bug in the handling of a
        # notebook.
        try:
            layer = annotations[i][l]
        except:
            log.error(
                'could not associate XObject with layer: (i, l) ({}, {})'.
                format(i, l))
            log.error(str(annotations))
            log.error('document: {} ()').format('uuid', 'self.visible_name')
            continue
        layername = layer[0]
        ocg = IndirectPdfDict(Type=PdfName('OCG'), Name=layername)
        ocgprop.OCGs.append(ocg)
        ocgorderinner.append(ocg)
        rmpage.Resources.XObject[key].OC = ocg

    # Add individual OCG layers (Vector)
    if was_vector:
        for l, layerid in enumerate(vector_layers):
            # This would indicate a bug in the handling of a
            # notebook.
            try:
                layer = annotations[i][l]
            except:
                log.error(
                    'could not associate layerid with layer: (i, l, layerid) ({}, {}, {})'
                    .format(i, l, layerid))
                log.error('document: {} ()').format('uuid',
                                                    'self.visible_name')
                log.error(str(annotations))
                continue
            layername = layer[0]
            ocg = IndirectPdfDict(Type=PdfName('OCG'), Name=layername)
            ocgprop.OCGs.append(ocg)
            ocgorderinner.append(ocg)
            rmpage.Resources.Properties[PdfName(layerid)] = \
                ocg

    # Add order of OCGs to primary document
    ocgprop.D.Order.append(ocgpage)
    ocgprop.D.Order.append(ocgorderinner)

    return ocgorderinner
コード例 #13
0
ファイル: pdfreader.py プロジェクト: zhzhzoo/pdfrw
    def parsexref(self, source, int=int, range=range):
        ''' Parse (one of) the cross-reference file section(s)
        '''

        def _pairs(array):
            i = 0
            while 1:
                yield int(array[i]), int(array[i + 1])
                i += 2
                if (i + 1) >= len(array):
                    break

        def convert_to_int(d, size):
            if size > 8:
                source.exception('Invalid size in convert_to_int')
            d = '\x00\x00\x00\x00\x00\x00\x00\x00' + d
            d = d[-8:]
            return struct.unpack('>q', d)[0]

        def read_trailer():
            tok = next()
            if tok != '<<':
                source.exception('Expected "<<" starting catalog')
            return self.readdict(source)

        setdefault = source.obj_offsets.setdefault
        add_offset = source.all_offsets.append
        next = source.next
        tok = next()
        if tok.isdigit():
            # check for xref stream object
            objid = source.multiple(2)
            ok = len(objid) == 2
            ok = ok and objid[0].isdigit()
            ok = ok and objid[1] == 'obj'
            if ok:
                next()  # start of dict
                obj = self.readdict(source)
                assert obj.Type == '/XRef'
                tok = next()
                end = source.floc + int(obj.Length)
                self.readstream(obj, self.findstream(obj, tok, source), source)
                uncompress([obj])
                num_pairs = obj.Index or PdfArray(['0', obj.Size])
                entry_sizes = [int(x) for x in obj.W]
                object_streams = {}
                for num, size in _pairs(num_pairs):
                    cnt = 0
                    stream_offset = 0
                    while cnt < size:
                        for i in range(len(entry_sizes)):
                            d = obj.stream[stream_offset:stream_offset +
                                                         entry_sizes[i]]
                            stream_offset += entry_sizes[i]
                            di = convert_to_int(d, entry_sizes[i])
                            if i == 0:
                                xref_type = di
                                if xref_type == 0 and entry_sizes[0] == 0:
                                    xref_type = 1
                            elif i == 1:
                                if xref_type == 1:
                                    offset = di
                                elif xref_type == 2:
                                    objnum = di
                            elif i == 2:
                                if xref_type == 1:
                                    generation = di
                                elif xref_type == 2:
                                    obstr_idx = di
                        if xref_type == 1 and offset != 0:
                            setdefault((num, generation), offset)
                            add_offset(offset)
                        elif xref_type == 2:
                            if not objnum in object_streams:
                                object_streams[objnum] = []
                            object_streams[objnum].append(obstr_idx)
                        cnt += 1
                        num += 1

                self.load_stream_objects(object_streams)

                source.floc = end
                endit = source.multiple(2)
                if endit != ['endstream', 'endobj']:
                    source.exception('Expected endstream endobj')
                return obj
            else:
                source.exception('Expected xref stream')

        elif tok == 'xref':
            # plain xref table
            start = source.floc
            try:
                while 1:
                    tok = next()
                    if tok == 'trailer':
                        return read_trailer()
                    startobj = int(tok)
                    for objnum in range(startobj, startobj + int(next())):
                        offset = int(next())
                        generation = int(next())
                        inuse = next()
                        if inuse == 'n':
                            if offset != 0:
                                setdefault((objnum, generation), offset)
                                add_offset(offset)
                        elif inuse != 'f':
                            raise ValueError
            except:
                pass
            try:
                # Table formatted incorrectly.
                # See if we can figure it out anyway.
                end = source.fdata.rindex('trailer', start)
                table = source.fdata[start:end].splitlines()
                for line in table:
                    tokens = line.split()
                    if len(tokens) == 2:
                        objnum = int(tokens[0])
                    elif len(tokens) == 3:
                        offset, generation, inuse = \
                            int(tokens[0]), int(tokens[1]), tokens[2]
                        if offset != 0 and inuse == 'n':
                            setdefault((objnum, generation), offset)
                            add_offset(offset)
                        objnum += 1
                    elif tokens:
                        log.error('Invalid line in xref table: %s' %
                                  repr(line))
                        raise ValueError
                log.warning('Badly formatted xref table')
                source.floc = end
                next()
            except:
                source.floc = start
                source.exception('Invalid table format')

            return read_trailer()
        else:
            source.exception('Expected "xref" keyword or xref stream object')