Beispiel #1
0
    def outlines_caj(self):
        self.log('outlines')
        if self.check_file_exist() is False:
            self.log('--------')
            return
        try:
            caj = CAJParser(self.line_address.text())
            toc = caj.get_toc()
            add_outlines(toc, self.getDefaultOutput(), "tmp.pdf")
            replace("tmp.pdf", self.getDefaultOutput())
            self.log('output: %s' % self.getDefaultOutput())
        except Exception as e:
            self.log('Exception: %s' % e)

        self.log('--------')
Beispiel #2
0
def addindex():
	global originpdf
	global origincaj
	try:
		cajadd = CAJParser(origincaj)
		toc = cajadd.get_toc()
		tmp=os.path.dirname(originpdf)+'/'+'tmp.pdf'
		print(tmp)
		add_outlines(toc, originpdf, tmp)
		#pdfname=os.path.basename(originpdf)#获取待加目录pdf的文件名
		#base=os.path.dirname(originpdf)#获取待加目录pdf的路径
		os.replace(tmp,originpdf)
		#print('replace')  #调试用的
		messagebox.showinfo('提示','完成')
	except:
		messagebox.showinfo('错误','请检查是否选中caj或pdf')
Beispiel #3
0
def addindex():
    global originpdf
    global origincaj
    try:
        cajadd = CAJParser(origincaj)
        toc = cajadd.get_toc()
        tmp = os.path.dirname(originpdf) + '/' + 'tmp.pdf'
        print(tmp)
        add_outlines(toc, originpdf, tmp)
        #pdfname=os.path.basename(originpdf)#获取待加目录pdf的文件名
        #base=os.path.dirname(originpdf)#获取待加目录pdf的路径
        os.replace(tmp, originpdf)
        #print('replace')  #调试用的
        messagebox.showinfo('提示', '完成')
    except:
        messagebox.showinfo('错误', '请检查是否选中caj或pdf')
Beispiel #4
0
    def outlines_caj(self):
        self.log('outlines')
        if self.check_file_exist(self.caj_address.text(), '.caj') is False:
            self.log('--------')
            return
        if self.check_file_exist(self.pdf_address.text(), '.pdf') is False:
            self.log('--------')
            return
        try:
            caj = CAJParser(self.caj_address.text())
            toc = caj.get_toc()
            add_outlines(toc, self.pdf_address.text(), "tmp.pdf")
            move("tmp.pdf", self.pdf_address.text())
            self.log('output: %s' % self.pdf_address.text())
        except Exception as e:
            self.log('Exception: %s' % e)

        self.log('--------')
Beispiel #5
0
    def _convert_caj(self, dest):
        caj = open(self.filename, "rb")

        # Extract original PDF data (and add header)
        caj.seek(self._PAGE_NUMBER_OFFSET + 4)
        [pdf_start_pointer] = struct.unpack("i", caj.read(4))
        caj.seek(pdf_start_pointer)
        [pdf_start] = struct.unpack("i", caj.read(4))
        pdf_end = fnd_all(caj, b"endobj")[-1] + 6
        pdf_length = pdf_end - pdf_start
        caj.seek(pdf_start)
        pdf_data = b"%PDF-1.3\r\n" + caj.read(pdf_length) + b"\r\n"
        with open("pdf.tmp", 'wb') as f:
            f.write(pdf_data)
        pdf = open("pdf.tmp", "rb")

        # deal with disordered PDF data
        endobj_addr = fnd_all(pdf, b"endobj")
        pdf_data = b"%PDF-1.3\r\n"
        obj_no = []
        for addr in endobj_addr:
            startobj = fnd_rvrs(pdf, b" 0 obj", addr)
            startobj1 = fnd_rvrs(pdf, b"\r", startobj)
            startobj2 = fnd_rvrs(pdf, b"\n", startobj)
            startobj = max(startobj1, startobj2)
            length = fnd(pdf, b" ", startobj) - startobj
            pdf.seek(startobj)
            [no] = struct.unpack(str(length) + "s", pdf.read(length))
            if int(no) not in obj_no:
                obj_no.append(int(no))
                obj_len = addr - startobj + 6
                pdf.seek(startobj)
                [obj] = struct.unpack(str(obj_len) + "s", pdf.read(obj_len))
                pdf_data += (b"\r" + obj)
        pdf_data += b"\r\n"
        with open("pdf.tmp", 'wb') as f:
            f.write(pdf_data)
        pdf = open("pdf.tmp", "rb")

        # Add Catalog (find obj_no of pages)
        inds_addr = [i + 8 for i in fnd_all(pdf, b"/Parent")]
        inds = []
        for addr in inds_addr:
            length = fnd(pdf, b" ", addr) - addr
            pdf.seek(addr)
            [ind] = struct.unpack(str(length) + "s", pdf.read(length))
            inds.append(int(ind))
        # get pages_obj_no list containing distinct elements
        # & find missing pages object(s) -- top pages object(s) in pages_obj_no
        pages_obj_no = []
        top_pages_obj_no = []
        for ind in inds:
            if (ind not in pages_obj_no) and (ind not in top_pages_obj_no):
                if fnd(pdf, bytes("\r{0} 0 obj".format(ind), "utf-8")) == -1:
                    top_pages_obj_no.append(ind)
                else:
                    pages_obj_no.append(ind)
        single_pages_obj_missed = len(top_pages_obj_no) == 1
        multi_pages_obj_missed = len(top_pages_obj_no) > 1
        # generate catalog object
        catalog_obj_no = fnd_unuse_no(obj_no, top_pages_obj_no)
        obj_no.append(catalog_obj_no)
        root_pages_obj_no = None
        if multi_pages_obj_missed:
            root_pages_obj_no = fnd_unuse_no(obj_no, top_pages_obj_no)
        elif single_pages_obj_missed:
            root_pages_obj_no = top_pages_obj_no[0]
            top_pages_obj_no = pages_obj_no
        else:  # root pages object exists, then find the root pages object #
            found = False
            for pon in pages_obj_no:
                tmp_addr = fnd(pdf, bytes("\r{0} 0 obj".format(pon), 'utf-8'))
                while True:
                    pdf.seek(tmp_addr)
                    [_str] = struct.unpack("6s", pdf.read(6))
                    if _str == b"Parent":
                        break
                    elif _str == b"endobj":
                        root_pages_obj_no = pon
                        found = True
                        break
                    tmp_addr = tmp_addr + 1
                if found:
                    break
        catalog = bytes(
            "{0} 0 obj\r<</Type /Catalog\r/Pages {1} 0 R\r>>\rendobj\r".format(
                catalog_obj_no, root_pages_obj_no), "utf-8")
        pdf_data += catalog
        with open("pdf.tmp", 'wb') as f:
            f.write(pdf_data)
        pdf = open("pdf.tmp", "rb")

        # Add Pages obj and EOF mark
        # if root pages object exist, pass
        # deal with single missing pages object
        if single_pages_obj_missed or multi_pages_obj_missed:
            inds_str = ["{0} 0 R".format(i) for i in top_pages_obj_no]
            kids_str = "[{0}]".format(" ".join(inds_str))
            pages_str = "{0} 0 obj\r<<\r/Type /Pages\r/Kids {1}\r/Count {2}\r>>\rendobj\r".format(
                root_pages_obj_no, kids_str, self.page_num)
            pdf_data += bytes(pages_str, "utf-8")
            with open("pdf.tmp", 'wb') as f:
                f.write(pdf_data)
            pdf = open("pdf.tmp", "rb")
        # deal with multiple missing pages objects
        if multi_pages_obj_missed:
            kids_dict = {i: [] for i in top_pages_obj_no}
            count_dict = {i: 0 for i in top_pages_obj_no}
            for tpon in top_pages_obj_no:
                kids_addr = fnd_all(
                    pdf, bytes("/Parent {0} 0 R".format(tpon), "utf-8"))
                for kid in kids_addr:
                    ind = fnd_rvrs(pdf, b"obj", kid) - 4
                    addr = fnd_rvrs(pdf, b"\r", ind)
                    length = fnd(pdf, b" ", addr) - addr
                    pdf.seek(addr)
                    [ind] = struct.unpack(str(length) + "s", pdf.read(length))
                    kids_dict[tpon].append(int(ind))
                    type_addr = fnd(pdf, b"/Type", addr) + 5
                    tmp_addr = fnd(pdf, b"/", type_addr) + 1
                    pdf.seek(tmp_addr)
                    [_type] = struct.unpack("5s", pdf.read(5))
                    if _type == b"Pages":
                        cnt_addr = fnd(pdf, b"/Count ", addr) + 7
                        pdf.seek(cnt_addr)
                        [_str] = struct.unpack("1s", pdf.read(1))
                        cnt_len = 0
                        while _str not in [b" ", b"\r", b"/"]:
                            cnt_len += 1
                            pdf.seek(cnt_addr + cnt_len)
                            [_str] = struct.unpack("1s", pdf.read(1))
                        pdf.seek(cnt_addr)
                        [cnt] = struct.unpack(
                            str(cnt_len) + "s", pdf.read(cnt_len))
                        count_dict[tpon] += int(cnt)
                    else:  # _type == b"Page"
                        count_dict[tpon] += 1
                kids_no_str = ["{0} 0 R".format(i) for i in kids_dict[tpon]]
                kids_str = "[{0}]".format(" ".join(kids_no_str))
                pages_str = "{0} 0 obj\r<<\r/Type /Pages\r/Kids {1}\r/Count {2}\r>>\rendobj\r".format(
                    tpon, kids_str, count_dict[tpon])
                pdf_data += bytes(pages_str, "utf-8")
        pdf_data += bytes("\n%%EOF\r", "utf-8")
        with open("pdf.tmp", 'wb') as f:
            f.write(pdf_data)

        # Use mutool to repair xref
        try:
            check_output(["./mutool", "clean", "pdf.tmp", "pdf_toc.pdf"],
                         stderr=STDOUT)
        except CalledProcessError as e:
            print(e.output.decode("utf-8"))
            raise SystemExit("Command mutool returned non-zero exit status " +
                             str(e.returncode))

        # Add Outlines
        add_outlines(self.get_toc(), "pdf_toc.pdf", dest)
        pdf.close()
        os.remove("pdf.tmp")
        os.remove("pdf_toc.pdf")
Beispiel #6
0
    def _convert_hn(self, dest):
        caj = open(self.filename, "rb")
        image_list = []

        from pdfwutils import Colorspace, ImageFormat, convert_ImageList
        import zlib

        for i in range(self.page_num):
            caj.seek(self._TOC_END_OFFSET + i * 20)
            [
                page_data_offset, size_of_text_section, images_per_page,
                page_no, unk2, next_page_data_offset
            ] = struct.unpack("iihhii", caj.read(20))
            caj.seek(page_data_offset)
            text_header_read32 = caj.read(32)
            if (text_header_read32[8:20] == b'COMPRESSTEXT'):
                [expanded_text_size] = struct.unpack("i",
                                                     text_header_read32[20:24])
                import zlib
                caj.seek(page_data_offset + 24)
                data = caj.read(size_of_text_section - 24)
                output = zlib.decompress(data, bufsize=expanded_text_size)
                if (len(output) != expanded_text_size):
                    raise SystemExit("Unexpected:", len(output),
                                     expanded_text_size)
            else:
                caj.seek(page_data_offset)
                output = caj.read(size_of_text_section)
            from HNParsePage import HNParsePage
            page_style = (next_page_data_offset > page_data_offset)
            page_data = HNParsePage(output, page_style)

            if (images_per_page > 1):
                if (len(page_data.figures) == images_per_page):
                    image_list.append(None)
                    image_list.append(page_data.figures)
                else:
                    print("Page %d, Image Count %d != %d" %
                          (i + 1, len(page_data.figures), images_per_page))
                    image_list.append(None)
                    image_list.append(page_data.figures[0:images_per_page])
            current_offset = page_data_offset + size_of_text_section
            for j in range(images_per_page):
                caj.seek(current_offset)
                read32 = caj.read(32)
                [image_type_enum, offset_to_image_data,
                 size_of_image_data] = struct.unpack("iii", read32[0:12])
                if (offset_to_image_data != current_offset + 12):
                    raise SystemExit("unusual image offset")
                caj.seek(offset_to_image_data)
                image_data = caj.read(size_of_image_data)
                current_offset = offset_to_image_data + size_of_image_data
                if (image_type[image_type_enum] == "JBIG"):
                    from jbigdec import CImage
                    cimage = CImage(image_data)
                    out = cimage.DecodeJbig()
                    # PBM is only padded to 8 rather than 32.
                    # If the padding is larger, write padded file.
                    width = cimage.width
                    if (cimage.bytes_per_line > ((cimage.width + 7) >> 3)):
                        width = cimage.bytes_per_line << 3
                    image_item = (Colorspace.P, (300, 300), ImageFormat.PBM,
                                  zlib.compress(out), width, cimage.height,
                                  [0xffffff, 0], False, 1, 0)
                elif (image_type[image_type_enum] == "JBIG2"):
                    from jbig2dec import CImage
                    cimage = CImage(image_data)
                    out = cimage.DecodeJbig2()
                    # PBM is only padded to 8 rather than 32.
                    # If the padding is larger, write padded file.
                    width = cimage.width
                    if (cimage.bytes_per_line > ((cimage.width + 7) >> 3)):
                        width = cimage.bytes_per_line << 3
                    image_item = (Colorspace.P, (300, 300), ImageFormat.PBM,
                                  zlib.compress(out), width, cimage.height,
                                  [0xffffff, 0], False, 1, 0)
                elif (image_type[image_type_enum] == "JPEG"):
                    # stock libjpeg location
                    (SOFn, frame_length, bits_per_pixel, height,
                     width) = struct.unpack(">HHBHH", image_data[158:167])
                    if (SOFn != 0xFFC0):
                        # "Intel(R) JPEG Library" location
                        (SOFn, frame_length, bits_per_pixel, height,
                         width) = struct.unpack(">HHBHH",
                                                image_data[0x272:0x27b])
                        if (SOFn != 0xFFC0):
                            # neither works, try brute-force
                            import imagesize
                            with open(".tmp.jpg", "wb") as f:
                                f.write(image_data)
                                (width, height) = imagesize.get(".tmp.jpg")
                                os.remove(".tmp.jpg")
                    if (image_type_enum == 1):
                        # non-inverted JPEG Images
                        height = -height
                    image_item = (Colorspace.RGB, (300, 300), ImageFormat.JPEG,
                                  image_data, width, height, [], False, 8, 0)
                else:
                    raise SystemExit("Unknown Image Type %d" %
                                     (image_type_enum))
                image_list.append(image_item)
        if (len(image_list) == 0):
            raise SystemExit("File is pure-text HN; cannot convert to pdf")
        pdf_data = convert_ImageList(image_list)
        with open('pdf_toc.pdf', 'wb') as f:
            f.write(pdf_data)
        # Add Outlines
        add_outlines(self.get_toc(), "pdf_toc.pdf", dest)
        os.remove("pdf_toc.pdf")
Beispiel #7
0
                format(args.input, caj.format, caj.page_num, caj.toc_num))

    if args.command == "convert":
        caj = CAJParser(args.input)
        if args.output is None:
            if args.input.endswith(".caj"):
                args.output = args.input.replace(".caj", ".pdf")
            elif (len(args.input) > 4
                  and (args.input[-4] == '.' or args.input[-3] == '.')
                  and not args.input.endswith(".pdf")):
                args.output = os.path.splitext(args.input)[0] + ".pdf"
            else:
                args.output = args.input + ".pdf"
        caj.convert(args.output)

    if args.command == "outlines":
        caj = CAJParser(args.input)
        if caj.format == "PDF" or caj.format == "KDH":
            raise SystemExit("Unsupported file type: {0}.".format(caj.format))
        toc = caj.get_toc()
        add_outlines(toc, args.output, "tmp.pdf")
        os.replace("tmp.pdf", args.output)

    if args.command == "text-extract":
        caj = CAJParser(args.input)
        caj.text_extract()

    if args.command == "parse":
        caj = CAJParser(args.input)
        caj.parse()
Beispiel #8
0
    def _convert_caj(self, dest):
        caj = open(self.filename, "rb")

        # Extract original PDF data (and add header)
        caj.seek(self._PAGE_NUMBER_OFFSET + 4)
        [pdf_start_pointer] = struct.unpack("i", caj.read(4))
        caj.seek(pdf_start_pointer)
        [pdf_start] = struct.unpack("i", caj.read(4))
        pdf_end = fnd_all(caj, b"endobj")[-1] + 6
        pdf_length = pdf_end - pdf_start
        caj.seek(pdf_start)
        pdf_data = b"%PDF-1.3\r\n" + caj.read(pdf_length) + b"\r\n"
        with open("pdf.tmp", 'wb') as f:
            f.write(pdf_data)
        pdf = open("pdf.tmp", "rb")

        # Add Catalog (find obj_no of pages)
        inds_addr = [i + 8 for i in fnd_all(pdf, b"/Parent")]
        inds = []
        for addr in inds_addr:
            pdf.seek(addr)
            length = 0
            while True:
                [s] = struct.unpack("s", pdf.read(1))
                if s == b" ":
                    break
                else:
                    length += 1
                    pdf.seek(addr + length)
            pdf.seek(addr)
            [ind] = struct.unpack(str(length) + "s", pdf.read(length))
            inds.append(int(ind))
        pages_obj_no = min(inds)
        catalog = bytes(
            "1 0 obj\r<</Type /Catalog\r/Pages {0} 0 R\r>>\rendobj\r".format(
                pages_obj_no), "utf-8")
        pdf_data += catalog
        with open("pdf.tmp", 'wb') as f:
            f.write(pdf_data)
        pdf = open("pdf.tmp", "rb")

        # Add Pages obj and EOF mark
        if fnd(pdf, bytes("\r{0} 0 obj\r<<".format(pages_obj_no),
                          "utf-8")) == -1:
            kids_addr = fnd_all(
                pdf, bytes("/Parent {0} 0 R".format(pages_obj_no), "utf-8"))
            inds_addr = []
            for kid in kids_addr:
                ind = kid - 6
                while True:
                    pdf.seek(ind)
                    [obj_str] = struct.unpack("6s", pdf.read(6))
                    if obj_str == b"obj\r<<":
                        break
                    else:
                        ind = ind - 1
                ind -= 1
                pdf.seek(ind)
                while True:
                    [s] = struct.unpack("s", pdf.read(1))
                    if s == b"\r":
                        break
                    else:
                        ind -= 1
                        pdf.seek(ind)
                inds_addr.append(ind + 1)
            inds = []
            for addr in inds_addr:
                pdf.seek(addr)
                length = 0
                while True:
                    [s] = struct.unpack("s", pdf.read(1))
                    if s == b" ":
                        break
                    else:
                        length += 1
                        pdf.seek(addr + length)
                pdf.seek(addr)
                [ind] = struct.unpack(str(length) + "s", pdf.read(length))
                inds.append(int(ind))
            inds_str = ["{0} 0 R".format(i) for i in inds]
            kids_str = "[{0}]".format(" ".join(inds_str))
            pages_str = "{0} 0 obj\r<<\r/Type /Pages\r/Kids {1}\r/Count {2}\r>>\rendobj".format(
                pages_obj_no, kids_str, self.page_num)
            pdf_data += bytes(pages_str, "utf-8")
        pdf_data += bytes("\r\n%%EOF\r", "utf-8")
        with open("pdf.tmp", 'wb') as f:
            f.write(pdf_data)

        # Use mutool to repair xref
        call(["mutool", "clean", "pdf.tmp", "pdf_toc.pdf"])

        # Add Outlines
        add_outlines(self.get_toc(), "pdf_toc.pdf", dest)
        call(["rm", "-f", "pdf.tmp"])
        call(["rm", "-f", "pdf_toc.pdf"])
Beispiel #9
0
    def _convert_caj(self, dest):
        caj = open(self.filename, "rb")

        # Extract original PDF data (and add header)
        caj.seek(self._PAGE_NUMBER_OFFSET + 4)
        [pdf_start_pointer] = struct.unpack("i", caj.read(4))
        caj.seek(pdf_start_pointer)
        [pdf_start] = struct.unpack("i", caj.read(4))
        pdf_end = fnd_all(caj, b"endobj")[-1] + 6
        pdf_length = pdf_end - pdf_start
        caj.seek(pdf_start)
        pdf_data = b"%PDF-1.3\r\n" + caj.read(pdf_length) + b"\r\n"
        with open("pdf.tmp", 'wb') as f:
            f.write(pdf_data)
        pdf = open("pdf.tmp", "rb")

        # deal with disordered PDF data
        endobj_addr = fnd_all(pdf, b"endobj")
        pdf_data = b"%PDF-1.3\r\n"
        obj_no = []
        for addr in endobj_addr:
            startobj = fnd_rvrs(pdf, b" 0 obj", addr)
            startobj1 = fnd_rvrs(pdf, b"\r", startobj)
            startobj2 = fnd_rvrs(pdf, b"\n", startobj)
            startobj = max(startobj1, startobj2)
            length = fnd(pdf, b" ", startobj) - startobj
            pdf.seek(startobj)
            [no] = struct.unpack(str(length) + "s", pdf.read(length))
            if int(no) not in obj_no:
                obj_no.append(int(no))
                obj_len = addr - startobj + 6
                pdf.seek(startobj)
                [obj] = struct.unpack(str(obj_len) + "s", pdf.read(obj_len))
                pdf_data += (b"\r" + obj)
        pdf_data += b"\r\n"
        with open("pdf.tmp", 'wb') as f:
            f.write(pdf_data)
        pdf = open("pdf.tmp", "rb")

        # Add Catalog (find obj_no of pages)
        inds_addr = [i + 8 for i in fnd_all(pdf, b"/Parent")]
        inds = []
        for addr in inds_addr:
            length = fnd(pdf, b" ", addr) - addr
            pdf.seek(addr)
            [ind] = struct.unpack(str(length) + "s", pdf.read(length))
            inds.append(int(ind))
        # get pages_obj_no list containing distinct elements
        # & find missing pages object(s) -- top pages object(s) in pages_obj_no
        pages_obj_no = []
        top_pages_obj_no = []
        for ind in inds:
            if (ind not in pages_obj_no) and (ind not in top_pages_obj_no):
                if fnd(pdf, bytes("\r{0} 0 obj".format(ind), "utf-8")) == -1:
                    top_pages_obj_no.append(ind)
                else:
                    pages_obj_no.append(ind)
        single_pages_obj_missed = len(top_pages_obj_no) == 1
        multi_pages_obj_missed = len(top_pages_obj_no) > 1
        # generate catalog object
        catalog_obj_no = fnd_unuse_no(obj_no, top_pages_obj_no)
        obj_no.append(catalog_obj_no)
        root_pages_obj_no = None
        if multi_pages_obj_missed:
            root_pages_obj_no = fnd_unuse_no(obj_no, top_pages_obj_no)
        elif single_pages_obj_missed:
            root_pages_obj_no = top_pages_obj_no[0]
            top_pages_obj_no = pages_obj_no
        else:  # root pages object exists, then find the root pages object #
            found = False
            for pon in pages_obj_no:
                tmp_addr = fnd(pdf, bytes("\r{0} 0 obj".format(pon), 'utf-8'))
                while True:
                    pdf.seek(tmp_addr)
                    [_str] = struct.unpack("6s", pdf.read(6))
                    if _str == b"Parent":
                        break
                    elif _str == b"endobj":
                        root_pages_obj_no = pon
                        found = True
                        break
                    tmp_addr = tmp_addr + 1
                if found:
                    break
        catalog = bytes("{0} 0 obj\r<</Type /Catalog\r/Pages {1} 0 R\r>>\rendobj\r".format(
            catalog_obj_no, root_pages_obj_no), "utf-8")
        pdf_data += catalog
        with open("pdf.tmp", 'wb') as f:
            f.write(pdf_data)
        pdf = open("pdf.tmp", "rb")

        # Add Pages obj and EOF mark
        # if root pages object exist, pass
        # deal with single missing pages object
        if single_pages_obj_missed or multi_pages_obj_missed:
            inds_str = ["{0} 0 R".format(i) for i in top_pages_obj_no]
            kids_str = "[{0}]".format(" ".join(inds_str))
            pages_str = "{0} 0 obj\r<<\r/Type /Pages\r/Kids {1}\r/Count {2}\r>>\rendobj\r".format(
                root_pages_obj_no, kids_str, self.page_num)
            pdf_data += bytes(pages_str, "utf-8")
            with open("pdf.tmp", 'wb') as f:
                f.write(pdf_data)
            pdf = open("pdf.tmp", "rb")
        # deal with multiple missing pages objects
        if multi_pages_obj_missed:
            kids_dict = {i: [] for i in top_pages_obj_no}
            count_dict = {i: 0 for i in top_pages_obj_no}
            for tpon in top_pages_obj_no:
                kids_addr = fnd_all(pdf, bytes("/Parent {0} 0 R".format(tpon), "utf-8"))
                for kid in kids_addr:
                    ind = fnd_rvrs(pdf, b"obj", kid) - 4
                    addr = fnd_rvrs(pdf, b"\r", ind)
                    length = fnd(pdf, b" ", addr) - addr
                    pdf.seek(addr)
                    [ind] = struct.unpack(str(length) + "s", pdf.read(length))
                    kids_dict[tpon].append(int(ind))
                    type_addr = fnd(pdf, b"/Type", addr) + 5
                    tmp_addr = fnd(pdf, b"/", type_addr) + 1
                    pdf.seek(tmp_addr)
                    [_type] = struct.unpack("5s", pdf.read(5))
                    if _type == b"Pages":
                        cnt_addr = fnd(pdf, b"/Count ", addr) + 7
                        pdf.seek(cnt_addr)
                        [_str] = struct.unpack("1s", pdf.read(1))
                        cnt_len = 0
                        while _str not in [b" ", b"\r", b"/"]:
                            cnt_len += 1
                            pdf.seek(cnt_addr + cnt_len)
                            [_str] = struct.unpack("1s", pdf.read(1))
                        pdf.seek(cnt_addr)
                        [cnt] = struct.unpack(str(cnt_len) + "s", pdf.read(cnt_len))
                        count_dict[tpon] += int(cnt)
                    else:  # _type == b"Page"
                        count_dict[tpon] += 1
                kids_no_str = ["{0} 0 R".format(i) for i in kids_dict[tpon]]
                kids_str = "[{0}]".format(" ".join(kids_no_str))
                pages_str = "{0} 0 obj\r<<\r/Type /Pages\r/Kids {1}\r/Count {2}\r>>\rendobj\r".format(
                    tpon, kids_str, count_dict[tpon])
                pdf_data += bytes(pages_str, "utf-8")
        pdf_data += bytes("\n%%EOF\r", "utf-8")
        with open("pdf.tmp", 'wb') as f:
            f.write(pdf_data)

        # Use mutool to repair xref
        try:
            check_output(["mutool", "clean", "pdf.tmp", "pdf_toc.pdf"], stderr=STDOUT)
        except CalledProcessError as e:
            print(e.output.decode("utf-8"))
            raise SystemExit("Command mutool returned non-zero exit status " + str(e.returncode))

        # Add Outlines
        add_outlines(self.get_toc(), "pdf_toc.pdf", dest)
        pdf.close()
        os.remove("pdf.tmp")
        os.remove("pdf_toc.pdf")