Exemple #1
0
def get_pdf_page_array():
    pdf_pages = []

    attrs = {"MediaBox": [0, 0, 400, 300]}
    for i in range(4):
        page = PDFPage(doc=None, pageid=i, attrs=attrs)
        rawdata = "Department of Homeland Security"
        pdf_stream = PDFStream(attrs, rawdata)
        pdf_stream.data = rawdata
        pdf_stream.set_objid(i, i)
        page.contents = [pdf_stream]
        pdf_pages.append(page)

    return pdf_pages
Exemple #2
0
 def do_keyword(self, pos, token):
   if token is self.KEYWORD_BI:
     # inline image within a content stream
     self.start_type(pos, 'inline')
   elif token is self.KEYWORD_ID:
     try:
       (_, objs) = self.end_type('inline')
       if len(objs) % 2 != 0:
         raise PSTypeError('Invalid dictionary construct: %r' % objs)
       d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) )
       (pos, data) = self.get_inline_data(pos+len('ID '))
       obj = PDFStream(d, data)
       self.push((pos, obj))
       self.push((pos, self.KEYWORD_EI))
     except PSTypeError:
       if STRICT: raise
   else:
     self.push((pos, token))
   return
 def test_encoding_DLIdentV_as_stream(self):
     stream = PDFStream({'CMapName':'DLIdent-V'}, '')
     spec = {'Encoding': stream}
     font = PDFCIDFont(None, spec)
     assert isinstance(font.cmap, IdentityCMap)
 def test_encoding_DLIdentH_as_PSLiteral_stream(self):
     stream = PDFStream({'CMapName':PSLiteral('DLIdent-H')}, '')
     spec = {'Encoding': stream}
     font = PDFCIDFont(None, spec)
     assert isinstance(font.cmap, IdentityCMap)
 def test_cmapname_H(self):
     stream = PDFStream({'CMapName': PSLiteral('H')}, '')
     spec = {'Encoding': stream}
     font = PDFCIDFont(None, spec)
     assert isinstance(font.cmap, CMap)
 def test_cmapname_onebyteidentityH(self):
     stream = PDFStream({'CMapName': PSLiteral('OneByteIdentityH')}, '')
     spec = {'Encoding': stream}
     font = PDFCIDFont(None, spec)
     assert isinstance(font.cmap, IdentityCMapByte)
Exemple #7
0
    def do_keyword(self, pos, token):
        if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
            self.add_results(*self.pop(1))
            return
        if token is self.KEYWORD_ENDOBJ:
            self.add_results(*self.pop(4))
            return

        if token is self.KEYWORD_R:
            # reference to indirect object
            try:
                ((_, objid), (_, genno)) = self.pop(2)
                (objid, genno) = (int(objid), int(genno))
                obj = PDFObjRef(self.doc, objid, genno)
                self.push((pos, obj))
            except PSSyntaxError:
                pass
            return

        if token is self.KEYWORD_STREAM:
            # stream object
            ((_, dic), ) = self.pop(1)
            dic = dict_value(dic)
            try:
                objlen = int_value(dic['Length'])
            except KeyError:
                if STRICT:
                    raise PDFSyntaxError('/Length is undefined: %r' % dic)
                objlen = 0
            self.seek(pos)
            try:
                (_, line) = self.nextline()  # 'stream'
            except PSEOF:
                if STRICT:
                    raise PDFSyntaxError('Unexpected EOF')
                return
            pos += len(line)
            self.fp.seek(pos)
            data = self.fp.read(objlen)
            self.seek(pos + objlen)
            while 1:
                try:
                    (linepos, line) = self.nextline()
                except PSEOF:
                    if STRICT:
                        raise PDFSyntaxError('Unexpected EOF')
                    break
                if 'endstream' in line:
                    i = line.index('endstream')
                    objlen += i
                    data += line[:i]
                    break
                objlen += len(line)
                data += line
            self.seek(pos + objlen)
            if 1 <= self.debug:
                print >>stderr, 'Stream: pos=%d, objlen=%d, dic=%r, data=%r...' % \
                      (pos, objlen, dic, data[:10])
            obj = PDFStream(dic, data, self.doc.decipher)
            self.push((pos, obj))
            return

        # others
        self.push((pos, token))
        return