Ejemplo n.º 1
0
 def test_counts_correct_amount_of_paragraphs_for_complex_56(self):
     result = interleave.zip_sentences(
         interleave.build_paragraphs(
             interleave.sanitize_text(self.complex_PDF_5)),
         interleave.build_paragraphs(
             interleave.sanitize_text(self.complex_PDF_6)))
     self.assertEqual(69, len(result))
Ejemplo n.º 2
0
    def test_zip_sentences_to_tuple(self):
        list1 = '\n\n' + self.short_text
        list2 = '\n\n' + self.short_text

        self.assertEqual(
            self.processed_text,
            interleave.zip_sentences(
                interleave.build_paragraphs(interleave.sanitize_text(list1)),
                interleave.build_paragraphs(interleave.sanitize_text(list2))))
Ejemplo n.º 3
0
 def test_edge_valid_nonroman_string(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text('\n\n' + self.edge_complex_7))
     self.assertEqual(4, len(result))
Ejemplo n.º 4
0
 def test_edge_marks_bad_paragraph_parse(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text('\n\n' + self.edge_bad_parse))
     self.assertEqual(3, len(result))
     self.assertIn('2. PARSE ERROR', result)
Ejemplo n.º 5
0
 def test_edge_case_isolate_first_paragraph(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text(self.edge_first_paragraph))
     self.assertEqual(1, len(result))
Ejemplo n.º 6
0
 def test_edge_case_missing_paragraphs(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text('\n\n' + self.edge_missing_paragraphs))
     self.assertEqual(7, len(result))  # Expect seven paragraphs
Ejemplo n.º 7
0
 def test_edge_case_line_starts_with_numeric_sentence_end(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text('\n\n' + self.edge_numbers))
     self.assertEqual(7, len(result))  # Expect seven paragraphs
Ejemplo n.º 8
0
 def test_builds_paragraphs_correctly(self):
     self.assertEqual(self.split_simple_text,
                      interleave.build_paragraphs('\n\n' + self.short_text))
Ejemplo n.º 9
0
 def test_counts_up_to_1000_paragraphs(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text(self.thousand_paragraphs))
     self.assertEqual(1000, len(result))
Ejemplo n.º 10
0
 def test_splits_multipage_paragraphs(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text('\n\n' + self.multipage_text))
     self.assertEqual(self.split_multipage_text, result)
Ejemplo n.º 11
0
 def test_splits_short_sentences(self):
     result = interleave.build_paragraphs(
         interleave.sanitize_text('\n\n' + self.short_text))
     self.assertEqual(self.split_simple_text, result)