Esempio n. 1
0
def p_dictionary_entry_list(p):
    ''' dictionary_entry_list : dictionary_entry_list NAME object
                              |  '''
    if len(p) == 1:
        p[0]=[]
    else:
        key_node = create_leaf('name', p[2], span=p.lexspan(2))
        dictionary_span = (p.lexspan(2)[0],p.lexspan(3)[1])
        dictionary_node = create_tree('entry', [key_node,p[3]], span=dictionary_span)
        p[0] = p[1] + [dictionary_node]
Esempio n. 2
0
def p_dictionary(p):
    ''' dictionary : DOUBLE_LESS_THAN_SIGN dictionary_entry_list DOUBLE_GREATER_THAN_SIGN '''
    p[0] = create_tree('dictionary', p[2], span=(p.lexspan(1)[0], p.lexspan(3)[1]))
Esempio n. 3
0
def p_array(p):
    ''' array : LEFT_SQUARE_BRACKET object_list RIGHT_SQUARE_BRACKET '''
    p[0] = create_tree('array', p[2], span=p.lexspan(0))
Esempio n. 4
0
def p_pdf(p):
    ''' pdf : HEADER pdf_update_list'''
    header = create_leaf('header', p[1], span=p.lexspan(1))
    p[0] = create_tree('pdf', [header] + p[2], span=p.lexspan(0), version="OPAF!" )
Esempio n. 5
0
def p_pdf_brute_end(p):
    ''' pdf_brute_end : XREF TRAILER  dictionary STARTXREF EOF'''
    xref = create_tree('xref', [p[3]],span=(0,p.lexspan(4)[0]-1), xref=p[1])
    pdf_end = create_leaf('startxref', p[4], span=(p.lexspan(4)[0],p.lexspan(0)[1]))
    p[0] = [xref, pdf_end] 
Esempio n. 6
0
def p_pdf_update(p):
    ''' pdf_update : body xref pdf_end '''
    p[0] = create_tree('pdf_update', p[1]+[p[2],p[3]],span=(0xffffffff,-1))
    [p[0].span_expand(e.span) for e in p[1]+[p[2],p[3]]]
Esempio n. 7
0
def p_xref_common(p):
    ''' xref : XREF TRAILER dictionary '''
    data = create_leaf('data', str(p[1]), span=p.lexspan(0))
    p[0] = create_tree('xref',[p[3], data], span=p.lexspan(0))
Esempio n. 8
0
def p_indirect_object_stream(p):
    ''' indirect_object_stream : OBJ dictionary STREAM_DATA ENDOBJ '''
    stream_data = create_leaf('data',p[3],span=(p.lexspan(2)[0],p.lexspan(4)[1]))
    stream = create_tree('stream',[p[2], stream_data],span=p.lexspan(0))
    p[0] =  create_tree('indirect_object', [stream],span=p.lexspan(0), id="%d %d"%p[1])
Esempio n. 9
0
def p_indirect_object(p):
    ''' indirect_object : OBJ object ENDOBJ '''
    ref = "%d %d"%p[1]
    p[0] = create_tree('indirect_object', [p[2]], span=p.lexspan(0), id=ref)
Esempio n. 10
0
        if len(xml_pdf_ends) == 0:
            logger.info("%%%%EOF tag was not found! Creating a dummy.")
            dummy_startxref = create_leaf('startxref', -1, span=(len(pdf),len(pdf)))
            print dummy_startxref.value
            allobjects.append(dummy_startxref)

        if len(xml_headers) == 0:
            logger.info("%%%%PDF-N-M tag was not found! Creating a dummy.")
            allobjects.append(create_leaf('header', "NOVERSION", span=(0,0)))

        #Sort it as they appear in the file
        allobjects = sorted(allobjects,lambda x,y: cmp(x.span[0], y.span[0]))

        #recreate XML structure 'best' we can...
        assert allobjects[0].tag == 'header'
        root_element = create_tree('pdf', [allobjects.pop(0)], span=(0,len(pdf)), version="OPAF!(raw)")
        
        update = create_tree('pdf_update', [],span=(0xfffffff,-1))
        while len(allobjects)>0:
            thing = allobjects.pop(0)
            update.append(thing)
            update.span_expand(thing.span)
            if thing.tag == 'startxref':
                root_element.append(update)
                update = create_tree('pdf_update',[],span=(0xfffffff,-1))

        if len(update)>0:
            logger.info("Missing ending %%EOF")
            root_element.append(update)

        return root_element