def test_counts_correct_amount_of_paragraphs_for_complex_56(self): result = interleave.zip_sentences( interleave.build_paragraphs( interleave.sanitize_text(self.complex_PDF_5)), interleave.build_paragraphs( interleave.sanitize_text(self.complex_PDF_6))) self.assertEqual(69, len(result))
def test_zip_sentences_to_tuple(self): list1 = '\n\n' + self.short_text list2 = '\n\n' + self.short_text self.assertEqual( self.processed_text, interleave.zip_sentences( interleave.build_paragraphs(interleave.sanitize_text(list1)), interleave.build_paragraphs(interleave.sanitize_text(list2))))
def test_edge_valid_nonroman_string(self): result = interleave.build_paragraphs( interleave.sanitize_text('\n\n' + self.edge_complex_7)) self.assertEqual(4, len(result))
def test_edge_marks_bad_paragraph_parse(self): result = interleave.build_paragraphs( interleave.sanitize_text('\n\n' + self.edge_bad_parse)) self.assertEqual(3, len(result)) self.assertIn('2. PARSE ERROR', result)
def test_edge_case_isolate_first_paragraph(self): result = interleave.build_paragraphs( interleave.sanitize_text(self.edge_first_paragraph)) self.assertEqual(1, len(result))
def test_edge_case_missing_paragraphs(self): result = interleave.build_paragraphs( interleave.sanitize_text('\n\n' + self.edge_missing_paragraphs)) self.assertEqual(7, len(result)) # Expect seven paragraphs
def test_edge_case_line_starts_with_numeric_sentence_end(self): result = interleave.build_paragraphs( interleave.sanitize_text('\n\n' + self.edge_numbers)) self.assertEqual(7, len(result)) # Expect seven paragraphs
def test_builds_paragraphs_correctly(self): self.assertEqual(self.split_simple_text, interleave.build_paragraphs('\n\n' + self.short_text))
def test_counts_up_to_1000_paragraphs(self): result = interleave.build_paragraphs( interleave.sanitize_text(self.thousand_paragraphs)) self.assertEqual(1000, len(result))
def test_splits_multipage_paragraphs(self): result = interleave.build_paragraphs( interleave.sanitize_text('\n\n' + self.multipage_text)) self.assertEqual(self.split_multipage_text, result)
def test_splits_short_sentences(self): result = interleave.build_paragraphs( interleave.sanitize_text('\n\n' + self.short_text)) self.assertEqual(self.split_simple_text, result)