コード例 #1
0
ファイル: parser_tst.py プロジェクト: ra2003/pdf
    def testNormalParser(self):
        object_tags_tbl = [
            ('array', '[ 1 (string) <414141> null ]'),
            ('number', '1'),
            ('number', '1.0'),
            ('number', '-1.0'),
            ('string', '(string)'),
            ('string', '(st(ri)ng)'),
            ('string', '(st(ri)n\\023g)'),
            ('string', '<41414141>'),
            ('dictionary',
             '<< /entry1 1 /entry2 (string) /entry3 <414141> /entry4 null >>'),
            ('dictionary',
             '<< /entry1 1 /entry2#01 (string) /en#45try3 <414141> /entry4 null >>'
             ),
        ]

        object_raises = [
            '1 0', '(asd', '1.0.1', 'asda>', '<asd>', '((asda)', 'obj'
            'qfdgfsda', '<< /aa >>'
        ]

        indirect_object_tags_tbl = [
            ('indirect_object', '1 0 obj\n(string)\nendobj  \n'),
            ('indirect_object', '1 0 obj\n<41414141>\nendobj\n'),
            ('indirect_object', '1 0 obj\n[1 (string) <414141> null]\nendobj'),
        ]

        indirect_object_raises = [
            '1 0 obj\n<</key 1>>\nendobj\n2 0 obj\n<</key 2\nendobj',
            '1 0 \nobj\n<</key 1>>\nendobj\n',
        ]

        #test invalid objects
        for obj_str in object_raises:
            self.assertRaises(Exception, parser.parse, ('object', obj_str))

        #test tags for an indirect object parser
        for tag, obj_str in indirect_object_tags_tbl:
            self.assertEqual(tag, parser.parse('indirect', obj_str + "\n").tag)

        #test invalid indirect_objects
        for obj_str in object_raises:
            self.assertRaises(Exception, parser.parse, ('indirect', obj_str))

        #test tags for a simple object parser
        for tag, obj_str in object_tags_tbl:
            self.assertEqual(tag, parser.parse('object', obj_str).tag)
コード例 #2
0
    def expandObjStm(self):
        '''
            This parses the ObjStm structure and replace it with all the new 
            indirect objects.
        '''
        from opaflib.parser import parse
        assert not self.isFiltered(), "ObjStm should not be compressed at this point"
        assert self.dictionary.has_key('N'), "N is mandatory in ObjStm dictionary"
        assert self.dictionary.has_key('First'), "First is mandatory in ObjStm dictionary"


        dictionary = self.dictionary
        data = self.data.value
        first = dictionary["First"].value
        pointers =  [int(x) for x in data[:first].split()]
        assert len(pointers)%2 == 0 , "Wrong number of integer in the ObjStm begining"
        pointers = dict([(pointers[i+1]+first,pointers[i]) for i in range(0,len(pointers),2) ])
        positions = sorted(pointers.keys() + [len(data)])
        
        parsed_objects = []
        for p in range(0,len(positions)-1):
            logger.info("Adding new object %s from objectstream"%repr((pointers[positions[p]],0)))
            io = PDF.indirect_object(parse('object', data[positions[p]:positions[p+1]]+" "))
            io.id = (pointers[positions[p]],0)
            parsed_objects.append(io)
        return parsed_objects
コード例 #3
0
ファイル: __init__.py プロジェクト: feliam/opaf
def expandObjStm(iostream):
    '''
        This parses the ObjStm structure and add all the new indirect
        objects ass childs of the ObjStm node.
    '''
    dictionary = xmlToPy(iostream[0])
    assert not 'Filter' in dictionary.keys(), "ObjStm should not be compressed at this point"
    assert 'N' in dictionary.keys(), "N is mandatory in ObjStm dictionary"
    assert 'First' in dictionary.keys(), "First is mandatory in ObjStm dictionary"
    assert len(iostream) == 2, "It is already expanded, or SITW!"
    data = payload(iostream[1])
    pointers =  [int(x) for x in data[:dictionary["First"]].split()]
    assert len(pointers)%2 == 0 , "Wrong number of integer in the ObjStm begining"
    pointers = dict([(pointers[i+1]+dictionary["First"],pointers[i]) for i in range(0,len(pointers),2) ])
    positions = pointers.keys()
    positions.sort()
    positions.append(len(data))
    object_stream = etree.Element('object_stream', lexstart=iostream[1].get('lexstart'),
                                                   lexend=iostream[1].get('lexend'), 
                                                   payload="")
    iobjects = iostream.xpath('//*[starts-with(local-name(),"indirect_object")]')

    for p in range(0,len(positions)-1):
        logger.info("Adding new object %s from objectstream %s"%((pointers[positions[p]],0),payload(iostream)))
        begin,end = (positions[p], positions[p+1])
        xmlobject = parse('object', data[positions[p]:positions[p+1]]+" ")
        io = etree.Element('indirect_object', lexstart=iostream[1].get('lexstart'),
                                              lexend=iostream[1].get('lexend'))
        setpayload(io,repr((pointers[positions[p]],0)))
            
        io.append(xmlobject)
        object_stream.append(io)
    iostream.append(object_stream)
コード例 #4
0
ファイル: parser_tst.py プロジェクト: feliam/opaf
    def testNormalParser(self):
        object_tags_tbl = [
            ("array", "[ 1 (string) <414141> null ]"),
            ("number", "1"),
            ("number", "1.0"),
            ("number", "-1.0"),
            ("string", "(string)"),
            ("string", "(st(ri)ng)"),
            ("string", "(st(ri)n\\023g)"),
            ("string", "<41414141>"),
            ("dictionary", "<< /entry1 1 /entry2 (string) /entry3 <414141> /entry4 null >>"),
            ("dictionary", "<< /entry1 1 /entry2#01 (string) /en#45try3 <414141> /entry4 null >>"),
        ]

        object_raises = ["1 0", "(asd", "1.0.1", "asda>", "<asd>", "((asda)", "obj" "qfdgfsda", "<< /aa >>"]

        indirect_object_tags_tbl = [
            ("indirect_object", "1 0 obj\n(string)\nendobj  \n"),
            ("indirect_object", "1 0 obj\n<41414141>\nendobj\n"),
            ("indirect_object", "1 0 obj\n[1 (string) <414141> null]\nendobj"),
        ]

        indirect_object_raises = [
            "1 0 obj\n<</key 1>>\nendobj\n2 0 obj\n<</key 2\nendobj",
            "1 0 \nobj\n<</key 1>>\nendobj\n",
        ]

        # test invalid objects
        for obj_str in object_raises:
            self.assertRaises(Exception, parser.parse, ("object", obj_str))

        # test tags for an indirect object parser
        for tag, obj_str in indirect_object_tags_tbl:
            self.assertEqual(tag, parser.parse("indirect", obj_str + "\n").tag)

        # test invalid indirect_objects
        for obj_str in object_raises:
            self.assertRaises(Exception, parser.parse, ("indirect", obj_str))

        # test tags for a simple object parser
        for tag, obj_str in object_tags_tbl:
            self.assertEqual(tag, parser.parse("object", obj_str).tag)
コード例 #5
0
def expandObjStm(iostream):
    '''
        This parses the ObjStm structure and add all the new indirect
        objects ass childs of the ObjStm node.
    '''
    dictionary = xmlToPy(iostream[0])
    assert not 'Filter' in dictionary.keys(
    ), "ObjStm should not be compressed at this point"
    assert 'N' in dictionary.keys(), "N is mandatory in ObjStm dictionary"
    assert 'First' in dictionary.keys(
    ), "First is mandatory in ObjStm dictionary"
    assert len(iostream) == 2, "It is already expanded, or SITW!"
    data = payload(iostream[1])
    pointers = [int(x) for x in data[:dictionary["First"]].split()]
    assert len(
        pointers) % 2 == 0, "Wrong number of integer in the ObjStm begining"
    pointers = dict([(pointers[i + 1] + dictionary["First"], pointers[i])
                     for i in range(0, len(pointers), 2)])
    positions = pointers.keys()
    positions.sort()
    positions.append(len(data))
    object_stream = etree.Element('object_stream',
                                  lexstart=iostream[1].get('lexstart'),
                                  lexend=iostream[1].get('lexend'),
                                  payload="")
    iobjects = iostream.xpath(
        '//*[starts-with(local-name(),"indirect_object")]')

    for p in range(0, len(positions) - 1):
        logger.info("Adding new object %s from objectstream %s" %
                    ((pointers[positions[p]], 0), payload(iostream)))
        begin, end = (positions[p], positions[p + 1])
        xmlobject = parse('object', data[positions[p]:positions[p + 1]] + " ")
        io = etree.Element('indirect_object',
                           lexstart=iostream[1].get('lexstart'),
                           lexend=iostream[1].get('lexend'))
        setpayload(io, repr((pointers[positions[p]], 0)))

        io.append(xmlobject)
        object_stream.append(io)
    iostream.append(object_stream)