Example #1
0
 def test_unicode_normalisation(self):
     self.assertEqual(normalise_text('yourseͩlf'), 'yourself')
     self.assertEqual(normalise_text('l̘oo͓kͩi̒ng f́o͍r a f%ckbuddy'), 'looking for a fuckbuddy')
     self.assertEqual(normalise_text('ẖaͣv̷e hot sxx'), 'have hot sxx')
Example #2
0
 def test_should_not_substitute_numbers(self):
     self.assertEqual(normalise_text('si.ngles542'), 'singles')
Example #3
0
 def test_random_examples(self):
     self.assertEqual(normalise_text('f u c k e r'), 'f****r')
     self.assertEqual(normalise_text('*f**k*'), 'f**k')
     self.assertEqual(normalise_text('sun-of-a-bitch!'), 'sunofabitch')
     self.assertEqual(normalise_text('sfucks'), 'sfucks')
     self.assertEqual(normalise_text('f*ck'), 'fck')
     self.assertEqual(normalise_text('d*ck'), 'dck')
     self.assertEqual(normalise_text('c*ck'), 'cck')
     self.assertEqual(normalise_text('s3̒x'), 'sex')
     self.assertEqual(normalise_text('sooَn'), 'soon')
     self.assertEqual(normalise_text('Un̟beliٝev֮able anal puñisheͤr'), 'unbelievable anal punisher')
     self.assertEqual(normalise_text('i\'m loּoki֒ng to f@ck righ̖t now'), 'im looking to fck right now')
     self.assertEqual(normalise_text('are yͮou avͯai͟lable? se͆nd m͙e a quiͩck msg'), 'are you available send me a quick msg')
     self.assertEqual(normalise_text('A̎lrite my bab̅y'), 'alrite my baby')
     self.assertEqual(normalise_text('seͤnd me a f%ckfr̎iend'), 'send me a fuckfriend')
     self.assertEqual(normalise_text('s̻o wet right n֥ow'), 'so wet right now')
     self.assertEqual(normalise_text('T͢A֚LK Sͯ00N', remove_numbers=False), 'talk soon')
     self.assertEqual(normalise_text('f u c k dick'), 'f**k dick')
     self.assertEqual(normalise_text('Hello! I\'m Reeses.'), 'hello im reeses')
Example #4
0
 def test_should_substitute_common_letter_encodings(self):
     self.assertEqual(normalise_text('5h1t'), 'shit')
     self.assertEqual(normalise_text('b!tch'), 'bitch')
     self.assertEqual(normalise_text('b00bs', remove_numbers=False), 'boobs')
Example #5
0
 def test_should_remove_explanation_marks(self):
     self.assertEqual(normalise_text('bitch!!!'), 'bitch')
     self.assertEqual(normalise_text('b!tch'), 'bitch')
     self.assertEqual(normalise_text('bitch !'), 'bitch')
     self.assertEqual(normalise_text('bitch !!!'), 'bitch')
     self.assertEqual(normalise_text('b!tch!!!'), 'bitch')
Example #6
0
 def test_should_remove_space_between_tweo_character_words(self):
     self.assertEqual(
         normalise_text('This text contains one encoded bad word, fu  ck  er'),
         'this text contains one encoded bad word f****r'
     )
Example #7
0
 def test_should_remove_special_characters(self):
     self.assertEqual(
         normalise_text('This text contains @#$^&*(_)[]{}<>,./?~Special)*(&*^&$^*$&*&()) characters.'),
         'this text contains special characters'
     )
Example #8
0
 def test_should_lowercase(self):
     self.assertEqual(
         normalise_text('This Text Conatins Uppercase Characters'),
         'this text conatins uppercase characters'
     )