def testSanitize2(self): '''Test sanitize with argument set 2''' data = [u'Äsbëst-Shop', u'Süper Düper Plugin', u'__RiemannΖ', ' ', '\\_', 'Andre\'sSSuperPLUGIN', 'AndisSSuper_PluginSTOP !', u'"Gauß"'] expected = ['Asbest-Shop', 'Super-Duper-Plugin', '--RiemannZeta', '----', '\\-', 'Andre-sSSuperPLUGIN', 'AndisSSuper-PluginSTOP--', '-Gau--'] for i, sample in enumerate(data): result = TextFX.sanitize(sample, safechar='-', replace_diacritics=True, replace_umlauts=False, replace_greek=True, allowed_chars=r'\\') self.assertEqual(result, expected[i])
def testSanitize1(self): '''Test sanitize with argument set 1''' data = [u'Äsbëst-Shop', u'Süper Düper Plugin', u'__RiemannΖ', ' ', '\\_', 'Andre\'sSSuperPLUGIN', 'AndisSSuper_PluginSTOP !', u'"Gauß"', u'André'] expected = ['AesbestShop', 'SueperDueperPlugin', 'RiemannZeta', '', '', 'AndresSSuperPLUGIN', 'AndisSSuperPluginSTOP', 'Gauss', 'Andre'] for i, sample in enumerate(data): result = TextFX.sanitize(sample, safechar='', replace_diacritics=True, replace_umlauts=True, replace_greek=True, allowed_chars='') self.assertEqual(result, expected[i])
def testSanitizeWithDifferentEncoding(self): '''Test sanitize with a different encoding than the default.''' encoding = 'latin-1' data = [u'Äsbëst-Shop'.decode("utf-8").encode(encoding), u'Süper Düper Plugin'.encode(encoding), ' '.encode(encoding), '\\_'.encode(encoding), 'Andre\'sSSuperPLUGIN'.encode(encoding), 'AndisSSuper_PluginSTOP !'.encode(encoding), u'"Gauß"'.encode(encoding), u'André'.encode(encoding)] expected = ['AesbestShop', 'SueperDueperPlugin', '', '', 'AndresSSuperPLUGIN', 'AndisSSuperPluginSTOP', 'Gauss', 'Andre'] for i, sample in enumerate(data): result = TextFX.sanitize(sample, safechar='', replace_diacritics=True, replace_umlauts=True, replace_greek=True, allowed_chars='', encoding=encoding) self.assertEqual(result, expected[i])
def testSanitize(self): '''Test sanitize with default arguments''' data = ['Really', 'Super Duper Plugin', 'RSS Generator', ' ', '_', 'AndreSSuperPLUGIN', 'AndisSSuper_PluginSTOP !'] expected = ['Really', 'Super_Duper_Plugin', 'RSS_Generator', '____', '_', 'AndreSSuperPLUGIN', 'AndisSSuper_PluginSTOP__'] for i, sample in enumerate(data): result = TextFX.sanitize(sample) self.assertEqual(result, expected[i])