def test_disallowed_replace_with(self): """replace_with should fail gracefully""" filth = Filth() with self.assertRaises(InvalidReplaceWith): filth.replace_with('surrogate') with self.assertRaises(InvalidReplaceWith): filth.replace_with('something_invalid')
def test_nonoverlapping_filth(self): """can't merge non-overlapping filth""" a_filth = Filth(beg=0, end=3, text="the") b_filth = Filth(beg=4, end=7, text="end") with self.assertRaises(FilthMergeError): a_filth.merge(b_filth) with self.assertRaises(FilthMergeError): b_filth.merge(a_filth)
def test_invalid_merge_documents(self): """Ensure Filth in two different documents cant be merged""" filth_a = Filth(0, 2, text='aa', document_name='one') filth_b = Filth(1, 2, text='a', document_name='two') with self.assertRaises(FilthMergeError): filth_a.merge(filth_b) with self.assertRaises(FilthMergeError): filth_b.merge(filth_a)
def test_merged_to_string(self): """Test the MergedFilth to string""" class TestFilth(Filth): type = 'test_filth' merged = MergedFilth(TestFilth(0, 2, 'ab'), Filth(1, 2, 'b')) self.assertEqual( merged.__repr__(), "<MergedFilth filths=[<TestFilth text='ab'>, <Filth text='b'>]>")
def test_equality(self): """Test the filth equality function""" self.assertTrue( Filth(beg=0, end=5, text='hello') == Filth( beg=0, end=5, text='hello')) self.assertTrue( Filth(beg=0, end=5, text='hello') == Filth( beg=0, end=5, text='hello', match=re.match('123', '1234'))) self.assertTrue( Filth(beg=0, end=5, text='hello') != Filth( beg=1, end=5, text='hello')) self.assertTrue( Filth(beg=0, end=5, text='hello') != Filth( beg=0, end=6, text='hello')) self.assertTrue( Filth(beg=0, end=5, text='hello') != Filth( beg=0, end=5, text='hellou')) self.assertTrue( Filth(beg=0, end=5, text='hello', document_name='test') == Filth( beg=0, end=5, text='hello', document_name='test')) self.assertTrue( Filth(beg=0, end=5, text='hello') != Filth( beg=0, end=5, text='hello', document_name='test')) self.assertTrue( Filth(beg=0, end=5, text='hello', document_name='test') != Filth( beg=0, end=5, text='hello')) self.assertTrue( Filth(beg=0, end=5, text='hello', document_name='test') != Filth( beg=0, end=5, text='hello', document_name='another_test')) self.assertTrue( Filth(beg=0, end=5, text='hello', detector_name='tester') == Filth( beg=0, end=5, text='hello', detector_name='tester')) self.assertTrue( Filth(beg=0, end=5, text='hello', detector_name='tester') != Filth( beg=0, end=5, text='hello', detector_name='another_tester')) self.assertTrue( Filth(beg=0, end=5, text='hello', detector_name='tester') != Filth( beg=0, end=5, text='hello')) self.assertTrue( Filth(beg=0, end=5, text='hello') != Filth( beg=0, end=5, text='hello', detector_name='tester')) self.assertTrue( Filth(beg=0, end=5, text='hello', document_name='test', detector_name='tester') == Filth(beg=0, end=5, text='hello', document_name='test', detector_name='tester')) self.assertTrue( Filth(beg=0, end=5, text='hello', document_name='test', detector_name='tester') != Filth( beg=0, end=5, text='hello', document_name='test', detector_name='another_tester')) self.assertTrue( Filth(beg=0, end=5, text='hello', document_name='test', detector_name='tester') != Filth( beg=0, end=5, text='hello', document_name='another_test', detector_name='tester'))
def test_filth_string(self): """Test the Filth to string function""" filth = Filth(beg=0, end=5) self.assertEqual(str(filth), "<Filth text=''>") filth = Filth(beg=0, end=5) self.assertEqual(filth.__repr__(), "<Filth text=''>") filth = Filth(beg=0, end=5) self.assertEqual(filth._to_string(), "<Filth text=''>") filth = Filth(beg=0, end=5, text='hello') self.assertEqual(str(filth), "<Filth text='hello'>") filth = Filth(beg=0, end=5, text='hello', document_name='hello.txt') self.assertEqual(str(filth), "<Filth text='hello' document_name='hello.txt'>") filth = Filth(beg=0, end=5, text='hello', document_name='hello.txt') self.assertEqual(filth._to_string(attributes=['text']), "<Filth text='hello'>") self.assertEqual(filth._to_string(attributes=['beg', 'end', 'text']), "<Filth beg=0 end=5 text='hello'>") self.assertEqual( filth._to_string(attributes=['text', 'document_name']), "<Filth text='hello' document_name='hello.txt'>")
def test_merging(self): """Ensure that filths are merged correctly""" filths = scrubadub.Scrubber._merge_filths([ Filth(beg=6, end=7, text='a'), Filth(beg=2, end=3, text='a'), Filth(beg=0, end=1, text='a'), Filth(beg=4, end=5, text='a'), ]) self.assertEqual([(f.beg, f.end) for f in filths], [(0, 1), (2, 3), (4, 5), (6, 7)]) filths = scrubadub.Scrubber._merge_filths([ Filth(beg=7, end=8, text='a'), Filth(beg=0, end=1, text='a'), Filth(beg=4, end=5, text='a'), Filth(beg=0, end=3, text='aaa'), Filth(beg=5, end=8, text='aaa'), ]) self.assertEqual([(f.beg, f.end) for f in filths], [(0, 3), (4, 8)]) filths = scrubadub.Scrubber._merge_filths([ Filth(beg=5, end=8, text='aaa', document_name='a'), Filth(beg=0, end=3, text='aaa', document_name='b'), Filth(beg=4, end=5, text='a', document_name='b'), Filth(beg=7, end=8, text='a', document_name='a'), Filth(beg=0, end=1, text='a', document_name='a'), ]) self.assertEqual([(f.document_name, f.beg, f.end) for f in filths], [('a', 0, 1), ('a', 5, 8), ('b', 0, 3), ('b', 4, 5)]) filths = scrubadub.Scrubber._merge_filths([ Filth(beg=5, end=8, text='aaa', document_name=None), Filth(beg=0, end=3, text='aaa', document_name='b'), Filth(beg=4, end=5, text='a', document_name='b'), Filth(beg=7, end=8, text='a', document_name=None), Filth(beg=0, end=1, text='a', document_name='a'), ]) self.assertEqual([(f.document_name, f.beg, f.end) for f in filths], [(None, 5, 8), ('a', 0, 1), ('b', 0, 3), ('b', 4, 5)])
def test_sorting(self): """Ensure that filths are sorted correctly""" filths = scrubadub.Scrubber._sort_filths([ Filth(beg=6, end=7), Filth(beg=2, end=3), Filth(beg=0, end=1), Filth(beg=4, end=5), ]) self.assertEqual([(f.beg, f.end) for f in filths], [(0, 1), (2, 3), (4, 5), (6, 7)]) filths = scrubadub.Scrubber._sort_filths([ Filth(beg=7, end=8), Filth(beg=0, end=1), Filth(beg=4, end=5), Filth(beg=0, end=3), Filth(beg=5, end=8), ]) self.assertEqual([(f.beg, f.end) for f in filths], [(0, 3), (0, 1), (4, 5), (5, 8), (7, 8)]) filths = scrubadub.Scrubber._sort_filths([ Filth(beg=5, end=8, document_name='a'), Filth(beg=0, end=3, document_name='b'), Filth(beg=4, end=5, document_name='b'), Filth(beg=7, end=8, document_name='a'), Filth(beg=0, end=1, document_name='a'), ]) self.assertEqual([(f.document_name, f.beg, f.end) for f in filths], [('a', 0, 1), ('a', 5, 8), ('a', 7, 8), ('b', 0, 3), ('b', 4, 5)])