Beispiel #1
0
 def test_for_loop(self):
     pdf = pdftotext.PDF(get_file("two_page.pdf"))
     result = ""
     for page in pdf:
         result = result + page
     self.assertIn("one", result)
     self.assertIn("two", result)
Beispiel #2
0
 def test_read_landscape_0(self):
     pdf = pdftotext.PDF(get_file("landscape_0.pdf"))
     result = pdf[0]
     self.assertIn("a", result)
     self.assertIn("b", result)
     self.assertIn("c", result)
     self.assertIn("d", result)
Beispiel #3
0
 def test_read_portrait(self):
     pdf = pdftotext.PDF(get_file("portrait.pdf"))
     result = pdf[0]
     self.assertIn("a", result)
     self.assertIn("b", result)
     self.assertIn("c", result)
     self.assertIn("d", result)
Beispiel #4
0
 def test_read_landscape_90(self):
     pdf = pdftotext.PDF(get_file("landscape_90.pdf"),
                         layout=pdftotext.TextLayout.raw,
                         encoding='latin1')
     result = pdf[0]
     self.assertIn("a", result)
     self.assertIn("b", result)
     self.assertIn("c", result)
     self.assertIn("d", result)
Beispiel #5
0
 def test_raw_vs_not(self):
     filename = "table.pdf"
     pdf = pdftotext.PDF(get_file(filename))
     raw_pdf = pdftotext.PDF(get_file(filename),
                             layout=pdftotext.TextLayout.raw)
     self.assertNotEqual(pdf[0], raw_pdf[0])
Beispiel #6
0
 def test_iter(self):
     pdf = pdftotext.PDF(get_file('two_page.pdf'))
     self.assertEqual([x.strip() for x in list(pdf)], ['one.', 'two.'])
Beispiel #7
0
 def test_read(self):
     pdf = pdftotext.PDF(get_file("abcde.pdf"))
     result = pdf[0]
     self.assertIn("abcde", result)
Beispiel #8
0
 def test_init_invalid_pdf_file(self):
     pdf_file = io.BytesIO(b"wrong")
     with self.assertRaises(TypeError):
         pdftotext.PDF(pdf_file)
Beispiel #9
0
 def test_init_corrupt_pdf_file(self):
     with self.assertRaises(ValueError):
         pdftotext.PDF(get_file("corrupt.pdf"))
Beispiel #10
0
 def test_double_init_success(self):
     pdf = pdftotext.PDF(get_file("abcde.pdf"))
     pdf.__init__(get_file("blank.pdf"))
     self.assertEqual(len(pdf), 1)
Beispiel #11
0
 def test_init_file_in_text_mode(self):
     text_file = io.StringIO(u"wrong")
     with self.assertRaises(TypeError):
         pdftotext.PDF(text_file)
Beispiel #12
0
 def test_read_corrupt_page(self):
     with self.assertRaises(IndexError):
         pdf = pdftotext.PDF(get_file("corrupt_page.pdf"))
         pdf[0]
Beispiel #13
0
 def test_pdf_read_wrong_arg_type(self):
     pdf = pdftotext.PDF(get_file("blank.pdf"))
     with self.assertRaises(TypeError):
         pdf["wrong"]
Beispiel #14
0
 def test_read_page_two(self):
     pdf = pdftotext.PDF(get_file("two_page.pdf"))
     result = pdf[1]
     self.assertIn("two", result)
Beispiel #15
0
 def test_list_last_element(self):
     pdf = pdftotext.PDF(get_file("two_page.pdf"))
     self.assertIn("two", pdf[-1])
Beispiel #16
0
 def test_list_invalid_element(self):
     pdf = pdftotext.PDF(get_file("two_page.pdf"))
     with self.assertRaises(IndexError):
         pdf[2]
Beispiel #17
0
 def test_list_length(self):
     pdf = pdftotext.PDF(get_file("two_page.pdf"))
     self.assertEqual(len(pdf), 2)
Beispiel #18
0
 def test_length_one(self):
     pdf = pdftotext.PDF(get_file("blank.pdf"))
     self.assertEqual(len(pdf), 1)
Beispiel #19
0
 def test_raw_is_not_default(self):
     filename = "table.pdf"
     pdf_default = pdftotext.PDF(get_file(filename))
     pdf_raw_false = pdftotext.PDF(get_file(filename),
                                   layout=pdftotext.TextLayout.physical)
     self.assertEqual(pdf_default[0], pdf_raw_false[0])
Beispiel #20
0
 def test_pdf_read_invalid_page_number(self):
     pdf = pdftotext.PDF(get_file("blank.pdf"))
     with self.assertRaises(IndexError):
         pdf[100]