Esempio n. 1
0
def analyze_pdf(path):
    try:
        with open(path, 'rb') as f:
            metadata = PdfFileReader(f).getDocumentInfo()
            metadata = {re.sub('[^A-Za-z]+', '', k): v for k, v in metadata.items()}
            author = metadata.get('Author')
            title = metadata.get('Title')
            date = metadata.get('CreationDate')
            year = get_year_from_date_string(date)
        return (author, title, year, path, 'pdf')
    except:
        return None
Esempio n. 2
0
def _get_pdf_page_dimensions(pdf_file_path, page_no):
    """
    Gets the height and width of the pdf at the given page no after the rotation is applied. The default height and
    width are swapped when the pdf has a rotation of 90/270(vertical).
    :param pdf_file_path: File path of the input pdf
    :param page_no: Page no whose dimensions are returned
    :return: A tuple of the form (width, height)
    """
    with open(pdf_file_path, 'rb') as file:
        pdf_file = PdfFileReader(file).getPage(page_no)
        media_box = pdf_file.mediaBox
        rotation = pdf_file.get('/Rotate')

        if utils.is_horizontal_orientation(rotation):
            w, h = media_box.getWidth(), media_box.getHeight()
        else:
            w, h = media_box.getHeight(), media_box.getWidth()

        return w, h