def test_get_pdf_text_without_encrypted(tmp_path): from ReadPDFFileV2 import get_pdf_text try: get_pdf_text(f'{CWD}/encrypted.pdf', f'{tmp_path}/encrypted.txt') raise Exception("Incorrect password exception should've been thrown") except ShellException as e: assert 'Command Line Error: Incorrect password\nShell error code: 1' == str(e) text = get_pdf_text(f'{CWD}/text-only.pdf', f'{tmp_path}/text-only.txt') expected = "עברית" assert expected in text assert text.startswith('This is a pdf document with a text line within it.') text = get_pdf_text(f'{CWD}/text-with-images.pdf', f'{tmp_path}/text-with-images.txt') expected = 'Create an ETD Using Adobe Acrobat' assert text.startswith(expected) text = get_pdf_text(f'{CWD}/scanned.pdf', f'{tmp_path}/scanned.txt') expected = '\x0c' assert expected == text
def test_get_pdf_text_without_encrypted(tmp_path): from ReadPDFFileV2 import get_pdf_text # assert error raised try: get_pdf_text(f'{CWD}/encrypted.pdf', f'{tmp_path}/encrypted.txt') raise Exception("Incorrect password exception should've been thrown") except ShellException as e: assert 'Incorrect password' in str(e) assert 'error code: 1' in str(e) # assert not warnings are raised text = get_pdf_text(f'{CWD}/warning_trigger.pdf', f'{tmp_path}/warning_trigger.txt') assert 'Riu Plaza Berlin' in text # assert extract file correctly text = get_pdf_text(f'{CWD}/text-only.pdf', f'{tmp_path}/text-only.txt') expected = "עברית" assert expected in text assert text.startswith( 'This is a pdf document with a text line within it.') text = get_pdf_text(f'{CWD}/text-with-images.pdf', f'{tmp_path}/text-with-images.txt') expected = 'Create an ETD Using Adobe Acrobat' assert text.startswith(expected) text = get_pdf_text(f'{CWD}/scanned.pdf', f'{tmp_path}/scanned.txt') expected = '\x0c' assert expected == text
def test_get_pdf_text_with_encrypted(mocker, tmp_path): mocker.patch.object(demisto, 'args', return_value={'userPassword': '******'}) from ReadPDFFileV2 import get_pdf_text text = get_pdf_text(f'{CWD}/encrypted.pdf', f'{tmp_path}/encrypted.txt') expected = "XSL FO Sample Copyright © 2002-2005 Antenna House, Inc. All rights reserved.\n\n" \ "Links in PDF\nPDF link is classified into two parts, link to the specified position in the PDF " \ "document, and link to the external document.\n" \ "The internal-destination property of fo:basic-link indicates to link to the position in the same" \ " document. The externaldestination property indicates to link to external document. " \ "Below shows the example.\n\nExample of a link to internal destination\nRefer to Purchasing " \ "Assistance to get more information.\nExample of a link to external destination\nRefer to Purchasing " \ "Assistance to get more information." assert text.startswith(expected)
def test_get_pdf_text_with_encrypted(tmp_path): from ReadPDFFileV2 import get_pdf_text, decrypt_pdf_file file_path = f'{CWD}/encrypted.pdf' dec_file_path = f'{CWD}/decrypted.pdf' decrypt_pdf_file(file_path, '1234', dec_file_path) text = get_pdf_text(dec_file_path, f'{tmp_path}/encrypted.txt') expected = "XSL FO Sample Copyright © 2002-2005 Antenna House, Inc. All rights reserved.\n\n" \ "Links in PDF\nPDF link is classified into two parts, link to the specified position in the PDF " \ "document, and link to the external document.\n" \ "The internal-destination property of fo:basic-link indicates to link to the position in the same" \ " document. The externaldestination property indicates to link to external document. " \ "Below shows the example.\n\nExample of a link to internal destination\nRefer to Purchasing " \ "Assistance to get more information.\nExample of a link to external destination\nRefer to Purchasing " \ "Assistance to get more information." if os.path.exists(dec_file_path): os.remove(dec_file_path) assert text.startswith(expected)