def test_unicode_normalisation(self): self.assertEqual(normalise_text('yourseͩlf'), 'yourself') self.assertEqual(normalise_text('l̘oo͓kͩi̒ng f́o͍r a f%ckbuddy'), 'looking for a fuckbuddy') self.assertEqual(normalise_text('ẖaͣv̷e hot sxx'), 'have hot sxx')
def test_should_not_substitute_numbers(self): self.assertEqual(normalise_text('si.ngles542'), 'singles')
def test_random_examples(self): self.assertEqual(normalise_text('f u c k e r'), 'f****r') self.assertEqual(normalise_text('*f**k*'), 'f**k') self.assertEqual(normalise_text('sun-of-a-bitch!'), 'sunofabitch') self.assertEqual(normalise_text('sfucks'), 'sfucks') self.assertEqual(normalise_text('f*ck'), 'fck') self.assertEqual(normalise_text('d*ck'), 'dck') self.assertEqual(normalise_text('c*ck'), 'cck') self.assertEqual(normalise_text('s3̒x'), 'sex') self.assertEqual(normalise_text('sooَn'), 'soon') self.assertEqual(normalise_text('Un̟beliٝev֮able anal puñisheͤr'), 'unbelievable anal punisher') self.assertEqual(normalise_text('i\'m loּoki֒ng to f@ck righ̖t now'), 'im looking to fck right now') self.assertEqual(normalise_text('are yͮou avͯai͟lable? se͆nd m͙e a quiͩck msg'), 'are you available send me a quick msg') self.assertEqual(normalise_text('A̎lrite my bab̅y'), 'alrite my baby') self.assertEqual(normalise_text('seͤnd me a f%ckfr̎iend'), 'send me a fuckfriend') self.assertEqual(normalise_text('s̻o wet right n֥ow'), 'so wet right now') self.assertEqual(normalise_text('T͢A֚LK Sͯ00N', remove_numbers=False), 'talk soon') self.assertEqual(normalise_text('f u c k dick'), 'f**k dick') self.assertEqual(normalise_text('Hello! I\'m Reeses.'), 'hello im reeses')
def test_should_substitute_common_letter_encodings(self): self.assertEqual(normalise_text('5h1t'), 'shit') self.assertEqual(normalise_text('b!tch'), 'bitch') self.assertEqual(normalise_text('b00bs', remove_numbers=False), 'boobs')
def test_should_remove_explanation_marks(self): self.assertEqual(normalise_text('bitch!!!'), 'bitch') self.assertEqual(normalise_text('b!tch'), 'bitch') self.assertEqual(normalise_text('bitch !'), 'bitch') self.assertEqual(normalise_text('bitch !!!'), 'bitch') self.assertEqual(normalise_text('b!tch!!!'), 'bitch')
def test_should_remove_space_between_tweo_character_words(self): self.assertEqual( normalise_text('This text contains one encoded bad word, fu ck er'), 'this text contains one encoded bad word f****r' )
def test_should_remove_special_characters(self): self.assertEqual( normalise_text('This text contains @#$^&*(_)[]{}<>,./?~Special)*(&*^&$^*$&*&()) characters.'), 'this text contains special characters' )
def test_should_lowercase(self): self.assertEqual( normalise_text('This Text Conatins Uppercase Characters'), 'this text conatins uppercase characters' )