def check(args): document = doc.PdfDocument(args.pdffile) print '### CHECKING XREF INTEGRITY ###' print 'Looking for duplicated references ...' print 'Found', document.xref.getNumberOfDuplicatedOffsets(), \ 'entries pointing to the same offset' print 'Looking for duplicated streams ...' print 'found', document.xref.getNumberOfDuplicatedContents(), \ 'duplicated contents'
def graph(args): document = doc.PdfDocument(args.pdffile) component_from = args.fromObject component_to = args.toObject graph = objectGraph.ObjectGraph(document) if component_from: print graph.getGraphComponent(int(component_from)) elif component_to: print graph.getReversedGraphComponent(int(component_to)) else: print graph.getFullGraph()
def replace(args): objectnumber = int(args.objectnumber) input_filename = args.input output_filename = args.output replace_stream = args.stream input_file = open(input_filename, 'r') output_file = open(output_filename, 'w') content = input_file.read() document = doc.PdfDocument(args.pdffile) if replace_stream: document.xrefEntry(objectnumber).setStreamContent( document.filestream, content) else: document.xrefEntry(objectnumber).setContent(content) document.writeTo(output_file) output_file.close()
def show(args): document = doc.PdfDocument(args.pdffile) if not args.objectnumber: return numbers = splitArg(args.objectnumber) showstream = args.stream for number in numbers: if showstream: try: content = document.fetchStream(number) except Exception: print "Object doesn't seem to be a stream" return else: content = document.fetchXref(number).content print content,
def showxref(args): document = doc.PdfDocument(args.pdffile) print '### XREF ###' print str(document.xref)
def parse_pdf(self, f): self.pdfdoc = doc.PdfDocument(f, progress_cb=lambda status, **kwargs: (self.status_sync_var.set(status), self.progress_sync_var.set(kwargs[ 'read'] / kwargs['total'] * 100)))