from PyPDF2 import PdfFileReader # create a new PDF file with open('sample.pdf', 'wb') as f: f.write(b'This is page 1') f.write(b'This is page 2') f.write(b'This is page 3') f.write(b'This is page 4') f.write(b'This is page 5') # open the PDF file using PdfFileReader pdf = PdfFileReader(open('sample.pdf', 'rb')) # get the total number of pages total_pages = pdf.getNumPages() # print the total number of pages print("Total pages: ", total_pages)
Total pages: 5
from PyPDF2 import PdfFileReader # input PDF file we want to read pdf_file = open('document.pdf', 'rb') # create PDF reader object pdf_reader = PdfFileReader(pdf_file) # get number of pages in PDF document num_pages = pdf_reader.getNumPages() # loop through each page and print its contents for page in range(num_pages): page_obj = pdf_reader.getPage(page) print("Page ", page+1 , ":", page_obj.extractText()) # close the PDF file pdf_file.close()
Page 1 : This is page 1 content Page 2 : This is page 2 content Page 3 : This is page 3 content Page 4 : This is page 4 content Page 5 : This is page 5 contentWe can determine that the package library used in the above example is PyPDF2.