Exemple #1
0
 def testFindEqclassesCircularReferences(self):
   pdf = pdfsizeopt.PdfData()
   # The Rs are needed in the trailer, otherwise objects would be discarded.
   pdf.trailer = pdfsizeopt.PdfObj(
       '0 0 obj<<4 0 R 5 0 R 9 0 R 10 0 R>>endobj')
   pdf.objs[4] = pdfsizeopt.PdfObj(
       '0 0 obj<</Parent  1 0 R/Type/Pages/Kids[9 0 R]/Count 1>>endobj')
   pdf.objs[5] = pdfsizeopt.PdfObj(
       '0 0 obj<</Parent 1  0 R/Type/Pages/Kids[10 0 R]/Count 1>>endobj')
   pdf.objs[9] = pdfsizeopt.PdfObj(
       '0 0 obj<</Type/Page/MediaBox[0 0 419 534]/CropBox[0 0 419 534]'
       '/Parent 4 0 R/Resources<</XObject<</S 2 0 R>>/ProcSet[/PDF/ImageB]>>'
       '/Contents 3 0 R>>endobj')
   pdf.objs[10] = pdfsizeopt.PdfObj(
       '10 0 obj<</Type/Page/MediaBox[0 0 419 534]/CropBox[0 0 419 534]'
       '/Parent 5 0 R/Resources<</XObject<</S 2 0 R>>/ProcSet[/PDF/ImageB]>>'
       '/Contents 3 0 R>>endobj')
   pdf.objs['trailer'] = pdf.trailer
   new_objs = pdfsizeopt.PdfData.FindEqclasses(
       pdf.objs, do_remove_unused=True, do_renumber=True)
   del pdf.objs['trailer']
   for obj_num in new_objs:
     new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream)
   self.assertEqual(
       {1: ('<</Parent null/Type/Pages/Kids[2 0 R]/Count 1>>', None),
        2: ('<</Type/Page/MediaBox[0 0 419 534]/CropBox[0 0 419 534]'
            '/Parent 1 0 R/Resources<</XObject<</S null>>'
            '/ProcSet[/PDF/ImageB]>>/Contents null>>', None),
        'trailer': ('<<1 0 R 1 0 R 2 0 R 2 0 R>>', None)}, new_objs)
Exemple #2
0
 def testFindEqclassesAllEquivalentAndUndefined(self):
   pdf = pdfsizeopt.PdfData()
   pdf.trailer = pdfsizeopt.PdfObj('0 0 obj<<>>endobj')
   pdf.objs[1] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 2 0 R /U 6 0 R>>endobj')
   pdf.objs[2] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 1 0 R /U 7 0 R>>endobj')
   pdf.objs[3] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 4 0 R /U 8 0 R>>endobj')
   pdf.objs[4] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 3 0 R /U 9 0 R>>endobj')
   new_objs = pdfsizeopt.PdfData.FindEqclasses(pdf.objs)
   for obj_num in new_objs:
     new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream)
   self.assertEqual({1: ('<</S(q)/P 1 0 R/U null>>', None)}, new_objs)
Exemple #3
0
 def testFindEqclassesAllEquivalent(self):
   pdf = pdfsizeopt.PdfData()
   pdf.trailer = pdfsizeopt.PdfObj('0 0 obj<<>>endobj')
   pdf.objs[5] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 6 0 R>>endobj')
   pdf.objs[6] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 5 0 R >>endobj')
   pdf.objs[3] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 4 0 R  >>endobj')
   pdf.objs[4] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 3 0 R   >>endobj')
   new_objs = pdfsizeopt.PdfData.FindEqclasses(pdf.objs)
   for obj_num in new_objs:
     new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream)
   self.assertEqual({3: ('<</S(q)/P 3 0 R>>', None)}, new_objs)
Exemple #4
0
 def testFindEqclassesString(self):
   pdf = pdfsizeopt.PdfData()
   pdf.trailer = pdfsizeopt.PdfObj(
       '0 0 obj<</A[3 0 R]>>endobj')
   pdf.objs[3] = pdfsizeopt.PdfObj('0 0 obj<</A()/B<>/C(:)/D<3a3A4>>>endobj')
   pdf.objs['trailer'] = pdf.trailer
   new_objs = pdfsizeopt.PdfData.FindEqclasses(
       pdf.objs, do_remove_unused=True, do_renumber=True)
   for obj_num in new_objs:
     new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream)
   self.assertEqual(
       {'trailer': ('<</A[1 0 R]>>', None),
        1: ('<</A()/B()/C(:)/D(::@)>>', None)}, new_objs)
Exemple #5
0
 def testFindEqclassesTwoGroupsByOrder(self):
   pdf = pdfsizeopt.PdfData()
   pdf.trailer = pdfsizeopt.PdfObj('0 0 obj<<>>endobj')
   pdf.objs[1] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 2 0 R>>endobj')
   pdf.objs[2] = pdfsizeopt.PdfObj('0 0 obj<</P 1 0 R/S(q)>>endobj')
   pdf.objs[3] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 4 0 R>>endobj')
   pdf.objs[4] = pdfsizeopt.PdfObj('0 0 obj<</P 3 0 R  /S<71>>>endobj')
   new_objs = pdfsizeopt.PdfData.FindEqclasses(pdf.objs)
   for obj_num in new_objs:
     new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream)
   self.assertEqual(
       {1: ('<</S(q)/P 2 0 R>>', None),
        2: ('<</P 1 0 R/S(q)>>', None)}, new_objs)
Exemple #6
0
 def testFindEqclassesAllDifferentBecauseOfStream(self):
   pdf = pdfsizeopt.PdfData()
   pdf.trailer = pdfsizeopt.PdfObj('0 0 obj<<>>endobj')
   pdf.objs[1] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 2 0 R>>endobj')
   pdf.objs[2] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 1 0 R >>endobj')
   pdf.objs[2].stream = 'foo'
   pdf.objs[3] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 4 0 R  >>endobj')
   pdf.objs[4] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 3 0 R   >>endobj')
   pdf.objs[4].stream = 'fox'
   new_objs = pdfsizeopt.PdfData.FindEqclasses(pdf.objs)
   for obj_num in new_objs:
     new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream)
   self.assertEqual(
       {1: ('<</S(q)/P 2 0 R>>', None), 2: ('<</S(q)/P 1 0 R>>', 'foo'),
        3: ('<</S(q)/P 4 0 R>>', None), 4: ('<</S(q)/P 3 0 R>>', 'fox')},
       new_objs)
Exemple #7
0
 def testPdfObjGetSet(self):
   obj = pdfsizeopt.PdfObj('42 0 obj<</Foo(hi)>>\t\f\rendobj junk stream\r\n')
   self.assertEqual('<</Foo(hi)>>', obj._head)
   self.assertEqual(None, obj._cache)
   self.assertEqual('<</Foo(hi)>>', obj.head)
   self.assertEqual(None, obj._cache)
   self.assertEqual(None, obj.Get('Bar'))
   self.assertEqual(None, obj._cache)
   self.assertEqual(None, obj.Get('Fo'))
   self.assertEqual({'Foo': '<6869>'}, obj._cache)
   self.assertEqual('<6869>', obj.Get('Foo'))
   self.assertEqual('<</Foo(hi)>>', obj._head)
   obj.Set('Foo', ' \t<6869>\f \r ')
   self.assertEqual('<6869>', obj.Get('Foo'))
   self.assertEqual({'Foo': '<6869>'}, obj._cache)
   self.assertEqual('<</Foo(hi)>>', obj._head)
   obj.Set('Foo', ' \t(hi)\f \r ')
   self.assertEqual('<6869>', obj.Get('Foo'))
   self.assertEqual({'Foo': '<6869>'}, obj._cache)
   self.assertEqual('<</Foo(hi)>>', obj._head)  # still valid
   obj.Set('Foo', '(*)')
   self.assertEqual('<2a>', obj.Get('Foo'))
   self.assertEqual(None, obj._head)
   self.assertEqual({'Foo': '<2a>'}, obj._cache)
   obj.Set('Bar', '0042')
   self.assertEqual({'Foo': '<2a>', 'Bar': 42}, obj._cache)
   self.assertEqual(None, obj._head)
   self.assertEqual('<</Bar 42/Foo(*)>>', obj.head)
   self.assertEqual('<</Bar 42/Foo(*)>>', obj._head)
   obj.Set('Bar', 'null')
   self.assertEqual({'Foo': '<2a>', 'Bar': 'null'}, obj._cache)
   self.assertEqual(None, obj._head)
   self.assertEqual('<</Bar null/Foo(*)>>', obj.head)
   self.assertEqual('<</Bar null/Foo(*)>>', obj._head)
   obj.Set('Bar', None, do_keep_null=True)
   self.assertEqual({'Foo': '<2a>', 'Bar': 'null'}, obj._cache)
   self.assertEqual('<</Bar null/Foo(*)>>', obj._head)
   obj.Set('Bar', None)
   self.assertEqual({'Foo': '<2a>'}, obj._cache)
   self.assertEqual(None, obj._head)
   self.assertEqual(len('<</Foo(*)>>') + 40, obj.size)
   self.assertEqual('<</Foo(*)>>', obj._head)
   self.assertEqual('<</Foo(*)>>', obj.head)
   obj.head = '<</Foo(*)>>'
   self.assertEqual({'Foo': '<2a>'}, obj._cache)
   obj.head = '<</Foo<2a>>>'  # invalidates the cache
   self.assertEqual(None, obj._cache)
   self.assertEqual(None, obj.Get('Food'))
   self.assertEqual(None, obj._cache)
   self.assertEqual('<2a>', obj.Get('Foo'))
   self.assertEqual({'Foo': '<2a>'}, obj._cache)
Exemple #8
0
 def NewObj(head, stream=None, do_compress=False):
   obj = pdfsizeopt.PdfObj(None)
   if stream is None:
     obj.head = head or ''
   else:
     if not isinstance(stream, str):
       raise TypeError
     obj.head = head or '<<>>'
     if do_compress:
       obj.SetStreamAndCompress(stream)
     else:
       obj.Set('Length', len(stream))
       obj.stream = stream
   return obj
Exemple #9
0
 def testFindEqclassesTwoGroupsWithTrailerRenumber(self):
   pdf = pdfsizeopt.PdfData()
   pdf.trailer = pdfsizeopt.PdfObj(
       '0 0 obj<</A[3 0 R 4 0 R 5 0 R 6 0 R 4 0 R]>>endobj')
   pdf.objs[5] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 6 0 R>>endobj')
   pdf.objs[6] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/Q 5 0 R >>endobj')
   pdf.objs[3] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 4 0 R  >>endobj')
   pdf.objs[4] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/Q 3 0 R   >>endobj')
   pdf.objs[10] = pdfsizeopt.PdfObj('0 0 obj[11 0 R]endobj')
   pdf.objs[11] = pdfsizeopt.PdfObj('0 0 obj[10 0 R]endobj')
   pdf.objs[12] = pdfsizeopt.PdfObj('0 0 obj[11 0 R]endobj')
   pdf.objs[12].stream = 'blah'
   pdf.objs['trailer'] = pdf.trailer
   new_objs = pdfsizeopt.PdfData.FindEqclasses(
       pdf.objs, do_remove_unused=True, do_renumber=True)
   del pdf.objs['trailer']
   for obj_num in new_objs:
     new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream)
   self.assertEqual(
       {'trailer': ('<</A[2 0 R 1 0 R 2 0 R 1 0 R 1 0 R]>>', None),
        2: ('<</S(q)/P 1 0 R>>', None),
        1: ('<</S(q)/Q 2 0 R>>', None)}, new_objs)
Exemple #10
0
 def testFindEqclassesTwoGroupsWithTrailer(self):
   pdf = pdfsizeopt.PdfData()
   pdf.trailer = pdfsizeopt.PdfObj(
       '0 0 obj<</A[3 0 R 4 0 R 5 0 R 6 0 R 3 0 R]>>endobj')
   pdf.objs[5] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 6 0 R>>endobj')
   pdf.objs[6] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/Q 5 0 R >>endobj')
   pdf.objs[3] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/P 4 0 R  >>endobj')
   pdf.objs[4] = pdfsizeopt.PdfObj('0 0 obj<</S(q)/Q 3 0 R   >>endobj')
   pdf.objs[10] = pdfsizeopt.PdfObj('0 0 obj[11 0 R]endobj')
   pdf.objs[11] = pdfsizeopt.PdfObj('0 0 obj[10 0 R]endobj')
   pdf.objs[12] = pdfsizeopt.PdfObj('0 0 obj[11 0 R]endobj')
   pdf.objs[12].stream = 'blah'
   pdf.objs['trailer'] = pdf.trailer
   new_objs = pdfsizeopt.PdfData.FindEqclasses(pdf.objs)
   del pdf.objs['trailer']
   for obj_num in new_objs:
     new_objs[obj_num] = (new_objs[obj_num].head, new_objs[obj_num].stream)
   self.assertEqual(
       {'trailer': ('<</A[3 0 R 4 0 R 3 0 R 4 0 R 3 0 R]>>', None),
        10: ('[10 0 R]', None),
        12: ('[10 0 R]', 'blah'),
        3: ('<</S(q)/P 4 0 R>>', None),
        4: ('<</S(q)/Q 3 0 R>>', None)}, new_objs)
Exemple #11
0
  def testPdfObjParse(self):
    obj = pdfsizeopt.PdfObj(
        '42 0 obj<</Length  3>>stream\r\nABC endstream endobj')
    self.assertEqual('<</Length  3>>', obj.head)
    self.assertEqual('ABC', obj.stream)
    obj = pdfsizeopt.PdfObj(
        '42 0 obj<</Length  4>>stream\r\nABC endstream endobj')
    self.assertRaises(
        pdfsizeopt.PdfTokenParseError, pdfsizeopt.PdfObj,
        '42 0 obj<</Length 99>>stream\r\nABC endstream endobj')
    self.assertEqual('<</Length  4>>', obj.head)
    self.assertEqual('ABC ', obj.stream)
    obj = pdfsizeopt.PdfObj(
        '42 0 obj<</Length  4>>endobj')
    self.assertEqual('<</Length  4>>', obj.head)
    self.assertEqual(None, obj.stream)
    obj = pdfsizeopt.PdfObj(
        '42 0 obj<</T[/Length 99]/Length  3>>stream\r\nABC endstream endobj')
    self.assertEqual('ABC', obj.stream)
    obj = pdfsizeopt.PdfObj(
        '42 0 obj<</T()/Length  3>>stream\nABC endstream endobj')
    self.assertEqual('ABC', obj.stream)
    s = '41 0 obj<</T(>>\nendobj\n)/Length  3>>stream\nABD endstream endobj'
    t = '42 0 obj<</T 5%>>endobj\n/Length  3>>stream\nABE endstream endobj'
    end_ofs_out = []
    obj = pdfsizeopt.PdfObj(s, end_ofs_out=end_ofs_out)
    self.assertEqual([len(s)], end_ofs_out)
    self.assertEqual('ABD', obj.stream)
    end_ofs_out = []
    obj = pdfsizeopt.PdfObj(t + '\r\n\tANYTHING', end_ofs_out=end_ofs_out)
    self.assertEqual([len(t) + 1], end_ofs_out)
    end_ofs_out = []
    obj = pdfsizeopt.PdfObj(
        '%s\n%s' % (s, t), start=len(s) + 1, end_ofs_out=end_ofs_out)
    self.assertEqual('ABE', obj.stream)
    self.assertEqual([len(s) + 1 + len(t)], end_ofs_out)
    # Exception because start points to '\n', not an `X Y obj'.
    self.assertRaises(
        pdfsizeopt.PdfTokenParseError,
        pdfsizeopt.PdfObj, '%s\n%s' % (s, t), start=len(s))

    s = '22 0 obj<</Producer(A)/CreationDate(B)/Creator(C)>>\nendobj '
    t = '23 0 obj'
    end_ofs_out = []
    obj = pdfsizeopt.PdfObj(s + t, end_ofs_out=end_ofs_out)
    self.assertEqual('<</Producer(A)/CreationDate(B)/Creator(C)>>', obj.head)
    self.assertEqual([len(s)], end_ofs_out)
    obj = pdfsizeopt.PdfObj(
        '42 0 obj[/Foo%]endobj\n42  43\t]\nendobj')
    # Parses the comment properly, but doesn't replace it with the non-comment
    # version.
    self.assertEqual('[/Foo%]endobj\n42  43\t]', obj.head)
    obj = pdfsizeopt.PdfObj('42 0 obj%hello\r  \t\f%more\n/Foo%bello\nendobj')
    # Leading comments are removed, but trailing comments aren't.
    self.assertEqual('/Foo%bello', obj.head)