Python PdfFileReader.getDestinationPageNumber Examples

Programming Language: Python

Namespace/Package Name: PyPDF2

Class/Type: PdfFileReader

Method/Function: getDestinationPageNumber

Examples at hotexamples.com: 8

Python PdfFileReader.getDestinationPageNumber - 8 examples found. These are the top rated real world Python examples of PyPDF2.PdfFileReader.getDestinationPageNumber extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PdfFileReader(30)

decrypt(30)

getPage(30)

getNumPages(30)

getDocumentInfo(30)

getFields(29)

getOutlines(21)

getFormTextFields(20)

getIsEncrypted(16)

getDestinationPageNumber(8)

getPageLayout(7)

getNamedDestinations(5)

_override_encryption(2)

close(2)

split(2)

get(2)

_flatten(1)

encrypt(1)

drawImage(1)

getNumOfPages(1)

addPage(1)

getNumPges(1)

decrypy(1)

add_font(1)

convert(1)

iter(1)

scale(1)

add_page(1)

Example #1

Show file

def test_get_destination_age_number():
    src = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")
    reader = PdfFileReader(src)
    outlines = reader.getOutlines()
    for outline in outlines:
        if not isinstance(outline, list):
            reader.getDestinationPageNumber(outline)

Example #2

Show file

File: pdf_picker.py Project: ant1k9/pdf-picker

 def __choose(self, reader: PdfFileReader, outlines: list, idx: int):
     current_outline = outlines[idx]
     pages = self.__chapter_pages(reader, outlines, idx)
     current_page = reader.getDestinationPageNumber(current_outline)
     for page in range(current_page, current_page + pages):
         self._writer.addPage(reader.getPage(page))
         self._written_pages += 1

Example #3

Show file

File: pdf_picker.py Project: ant1k9/pdf-picker

    def __chapter_pages(self, reader: PdfFileReader, outlines: list,
                        idx: int) -> int:
        current_outline = outlines[idx]
        current_page = reader.getDestinationPageNumber(current_outline)
        for idx_ in range(idx + 1, len(outlines)):
            next_outline = outlines[idx_]
            if isinstance(next_outline, Destination):
                return reader.getDestinationPageNumber(
                    next_outline) - current_page

        current_level, *_ = self.state_list[-1]
        if current_level != START_LEVEL:
            pages_to_upper_chapter = self.__pages_to_next_upper_chapter(
                reader, current_page, current_level)
            if pages_to_upper_chapter > 0:
                return pages_to_upper_chapter

        return reader.numPages - current_page

Example #4

Show file

def split_by_sections(path):
    """按照pdf书签结构拆分pdf文件，目前只支持第一级目录拆分

    Args:
        path (str): pdf文件路径
    """

    # 获取文件名（不包含路径和后缀），以便作为拆分后的文件名的基础
    filename = os.path.splitext(os.path.basename(path))[0]

    pdf_reader = PdfFileReader(path)
    outlines = pdf_reader.outlines
    # 存储各个section信息，包括标题、起始页、结束页
    sections = []
    titles = []
    start_pages = []
    end_pages = []

    for outline in outlines:
        titles.append(outline['/Title'])
        start_pages.append(pdf_reader.getDestinationPageNumber(outline) + 1)
        # 结束页码取下一个section的起始页码-1
        # end_pages数组比其他数组多一个元素
        # 虽然此处是本section的起始页码-1，实际上从数组取值是从第二个开始取值的
        # 因此起始存储的是下一个section的起始页码-1
        last_section_page = pdf_reader.getDestinationPageNumber(outline)
        end_pages.append(last_section_page)
    end_pages.append(pdf_reader.numPages)
    for i in range(len(outlines)):
        # 将section信息存入数组
        section = [titles[i], start_pages[i], end_pages[i + 1]]
        sections.append(section)

    for idx, section in enumerate(sections):
        title = section[0]
        pdf_writer = PdfFileWriter()
        # 每个section分别存储到独立的pdf
        for i in range(section[2] - section[1] + 1):
            pdf_writer.addPage(pdf_reader.getPage(section[1] + i - 1))
        output_filename = f'{filename}-{idx + 1}-{title}.pdf'
        with open(output_filename, 'wb') as out:
            pdf_writer.write(out)

Example #5

Show file

File: pdf_picker.py Project: ant1k9/pdf-picker

 def __pages_to_next_upper_chapter(self, reader: PdfFileReader,
                                   current_page: int,
                                   current_level: int) -> int:
     for state in reversed(self.state_list):
         previous_level, previous_outlines, previous_idx = state
         if previous_level < current_level:
             for outline in previous_outlines[(previous_idx + 1):]:
                 if isinstance(outline, Destination):
                     chapter_pages = reader.getDestinationPageNumber(
                         outline) - current_page
                     if chapter_pages > 0:
                         return chapter_pages
     return 0

Example #6

Show file

File: split_pdf_into_subchapters.py Project: p-severin/pdf_management

file_stream = open(file_to_read, 'rb')
pdf_content = PdfFileReader(file_stream)
outlines = pdf_content.getOutlines()

for i, item in enumerate(outlines):
    if type(item) is generic.Destination and type(outlines[i + 1]) is list:
        title = item.title
        title = '_'.join(title.strip().replace('/', '_').split(' '))
        max_number_of_characters = 100
        if len(title) > max_number_of_characters:
            title = title[:max_number_of_characters]
        outlines[i + 1].insert(0, item)
        content = outlines[i + 1]
        chapters.append((title, content))

for chapter in chapters:

    subchapters = flatten(chapter[1])
    file_to_write = dir_to_save_chapters / f'{chapter[0]}.pdf'

    pdf_writer = PdfFileWriter()
    start_page = pdf_content.getDestinationPageNumber(subchapters[0])
    end_page = pdf_content.getDestinationPageNumber(subchapters[-1])

    for i in range(start_page, end_page + 1):
        pdf_writer.addPage(pdf_content.getPage(i))
    with open(file_to_write, 'wb') as f:
        pdf_writer.write(f)

file_stream.close()

Example #7

Show file

File: searchandmove_file.py Project: guaigua/search_and_move_file

with open(srcfile, "rb") as f:
    pdf = PdfFileReader(f)
    #Try bookmarks without child
    try:
        bookmarks = pdf.getOutlines()
    except:
        upload=False
        errormsg= "this file contains bookmarks with child"
        error_log(filename,upload,errormsg)
        sys.exit()
    #Read Bookmarks
    if bookmarks:
        for b in bookmarks:
            invID = b['/Title']
            if len(invID) < 22 and re.match('\w',invID):
                i = pdf.getDestinationPageNumber(b)
                #Search InvID in database
                #Connect to db
                db = client.iportalDevDB19
                #Connect to collection
                collection = db.investors
                collection2 = db.fundinvestors
                rinvID = ''
                fundID = ''
                for y in collection2.find({ "invID":  invID }):
                    fundID=  str(y['fundID'])
                    print (fundID)
                if fundID:
                    for x in collection.find({ "invID":  invID }):
                        rinvID=  str(x['_id'])

Example #8

Show file

File: pdf_picker.py Project: ant1k9/pdf-picker

 def __is_the_end(self, reader: PdfFileReader, outlines: list,
                  idx: int) -> bool:
     left_pages = reader.numPages - reader.getDestinationPageNumber(
         outlines[idx])
     return self.__chapter_pages(reader, outlines, idx) == left_pages