def testSanitize2(self): '''Test sanitize with argument set 2''' data = [u'Äsbëst-Shop', u'Süper Düper Plugin', u'__RiemannΖ', ' ', '\\_', 'Andre\'sSSuperPLUGIN', 'AndisSSuper_PluginSTOP !', u'"Gauß"'] expected = ['Asbest-Shop', 'Super-Duper-Plugin', '--RiemannZeta', '----', '\\-', 'Andre-sSSuperPLUGIN', 'AndisSSuper-PluginSTOP--', '-Gau--'] for i, sample in enumerate(data): result = TextFX.sanitize(sample, safechar='-', replace_diacritics=True, replace_umlauts=False, replace_greek=True, allowed_chars=r'\\') self.assertEqual(result, expected[i])
def testToCamelCaseUnicode(self): data = [u'höt fläming cäts', u'HötFlämingCäts', u'hötFlämingCäts', u'höt_fläming_cäts', u'Höt Fläming _ Cäts'] expected = [u'HötFlämingCäts', u'HötFlämingCäts', u'HötFlämingCäts', u'HötFlämingCäts', u'HötFlämingCäts'] for i, sample in enumerate(data): result = TextFX.to_camelcase(sample) self.assertEqual(result, expected[i]) self.assertEqual(u'hôtFlâmíngCåts', TextFX.to_camelcase(u'hôt_Flâmíng __ Cåts', capitalize=False))
def testSanitizeWithDifferentEncoding(self): '''Test sanitize with a different encoding than the default.''' encoding = 'latin-1' data = [u'Äsbëst-Shop'.decode("utf-8").encode(encoding), u'Süper Düper Plugin'.encode(encoding), ' '.encode(encoding), '\\_'.encode(encoding), 'Andre\'sSSuperPLUGIN'.encode(encoding), 'AndisSSuper_PluginSTOP !'.encode(encoding), u'"Gauß"'.encode(encoding), u'André'.encode(encoding)] expected = ['AesbestShop', 'SueperDueperPlugin', '', '', 'AndresSSuperPLUGIN', 'AndisSSuperPluginSTOP', 'Gauss', 'Andre'] for i, sample in enumerate(data): result = TextFX.sanitize(sample, safechar='', replace_diacritics=True, replace_umlauts=True, replace_greek=True, allowed_chars='', encoding=encoding) self.assertEqual(result, expected[i])
def testSanitize1(self): '''Test sanitize with argument set 1''' data = [u'Äsbëst-Shop', u'Süper Düper Plugin', u'__RiemannΖ', ' ', '\\_', 'Andre\'sSSuperPLUGIN', 'AndisSSuper_PluginSTOP !', u'"Gauß"', u'André'] expected = ['AesbestShop', 'SueperDueperPlugin', 'RiemannZeta', '', '', 'AndresSSuperPLUGIN', 'AndisSSuperPluginSTOP', 'Gauss', 'Andre'] for i, sample in enumerate(data): result = TextFX.sanitize(sample, safechar='', replace_diacritics=True, replace_umlauts=True, replace_greek=True, allowed_chars='') self.assertEqual(result, expected[i])
def testToCamelCase(self): data = ['hot flaming cats', 'HotFlamingCats', 'hotFlamingCats', 'hot_flaming_cats', 'Hot Flaming _ Cats', 'Hot_Flaming __ Cats'] expected = ['HotFlamingCats', 'HotFlamingCats', 'HotFlamingCats', 'HotFlamingCats', 'HotFlamingCats', 'HotFlamingCats'] for i, sample in enumerate(data): result = TextFX.to_camelcase(sample) self.assertEqual(result, expected[i])
def testSplitCamelCase(self): data = ['My Plugin Extraordinaire!', 'Super Duper Plugin', 'RSS Generator', ' ', '_', 'AndreSSuperPLUGIN', 'AndisSSuper_PluginSTOP !'] expected = [['My', 'Plugin', 'Extraordinaire'], ['Super', 'Duper', 'Plugin'], ['RSS', 'Generator'], [], [], ['Andre', 'SSuper', 'PLUGIN'], ['Andis', 'SSuper', 'Plugin', 'STOP']] for i, sample in enumerate(data): result = TextFX.split_camelcase(sample) self.assertEqual(result, expected[i])
def testAbbreviate(self): data = ['My Plugin Extraordinaire!', 'Super Duper Plugin', 'RSS Generator', ' ', '_', 'AndreSSuperPLUGIN', 'AndisSSuper_PluginSTOP !'] expected = ['MPE', 'SDP', 'RG', '', '', 'ASP', 'ASPS'] for i, sample in enumerate(data): result = TextFX.abbreviate(sample) self.assertEqual(result, expected[i])
def testSanitize(self): '''Test sanitize with default arguments''' data = ['Really', 'Super Duper Plugin', 'RSS Generator', ' ', '_', 'AndreSSuperPLUGIN', 'AndisSSuper_PluginSTOP !'] expected = ['Really', 'Super_Duper_Plugin', 'RSS_Generator', '____', '_', 'AndreSSuperPLUGIN', 'AndisSSuper_PluginSTOP__'] for i, sample in enumerate(data): result = TextFX.sanitize(sample) self.assertEqual(result, expected[i])