def test_get_table(self): alphabet = Categories.get_alphabet(['LATIN']) table = Homoglyphs.get_table(alphabet) self.assertIn('s', table) self.assertNotIn(CIRILLIC_ES, table) alphabet = Categories.get_alphabet(['HANGUL', 'COMMON']) table = Homoglyphs.get_table(alphabet) self.assertGreater(len(table[u'ㅡ']), 0)
def test_to_ascii(self): ss = Homoglyphs(strategy=STRATEGY_LOAD).to_ascii(CIRILLIC_HE) self.assertEqual(ss, ['x']) ss = Homoglyphs(strategy=STRATEGY_LOAD).to_ascii(CIRILLIC_HE) self.assertEqual(ss, ['x']) ss = Homoglyphs(strategy=STRATEGY_LOAD, ascii_range=range(256)).to_ascii(CIRILLIC_HE + u'23.') self.assertEqual(ss, ['x23.', u'×23.'])
def test_get_char_variants(self): variants = Homoglyphs(['LATIN'])._get_char_variants('s') self.assertIn('s', variants) self.assertIn(u's', variants) self.assertNotIn('S', variants) self.assertNotIn(CIRILLIC_ES, variants) variants = Homoglyphs(['LATIN'])._get_char_variants('c') self.assertIn('c', variants) self.assertNotIn(CIRILLIC_ES, variants) variants = Homoglyphs(['LATIN', 'CYRILLIC'])._get_char_variants('c') self.assertIn('c', variants) self.assertIn(CIRILLIC_ES, variants)
def test_ascii_strategy(self): ss = Homoglyphs( categories=('LATIN', 'COMMON', 'CYRILLIC'), ascii_strategy=STRATEGY_IGNORE, ).to_ascii(u'xхч2') self.assertEqual(ss, []) ss = Homoglyphs( categories=('LATIN', 'COMMON', 'CYRILLIC'), ascii_strategy=STRATEGY_REMOVE, ascii_range=range(256), ).to_ascii(u'xхч2') self.assertEqual(ss, ['xx2', u'x×2', u'×x2', u'××2']) ss = Homoglyphs( strategy=STRATEGY_LOAD, ascii_strategy=STRATEGY_REMOVE, ).to_ascii(u'ч') self.assertEqual(ss, [])
def test_strategy(self): variants = Homoglyphs( ['LATIN'], strategy=STRATEGY_IGNORE)._get_char_variants(u'ё') self.assertEqual(variants, [u'ё']) variants = Homoglyphs( ['LATIN'], strategy=STRATEGY_LOAD)._get_char_variants(CIRILLIC_HE) self.assertGreater(len(variants), 1) self.assertIn('x', variants) self.assertIn(CIRILLIC_HE, variants) variants = Homoglyphs( ['LATIN'], strategy=STRATEGY_REMOVE)._get_char_variants(CIRILLIC_HE) self.assertEqual(variants, []) for s in (STRATEGY_REMOVE, STRATEGY_IGNORE, STRATEGY_LOAD): variants = Homoglyphs(['LATIN'], strategy=s)._get_char_variants('d') self.assertIn('d', variants)
def test_to_ascii(self): ss = Homoglyphs(strategy=STRATEGY_LOAD).to_ascii(CIRILLIC_HE) self.assertEqual(ss, ['x']) ss = Homoglyphs(strategy=STRATEGY_LOAD).to_ascii(CIRILLIC_HE) self.assertEqual(ss, ['x']) ss = Homoglyphs(strategy=STRATEGY_LOAD).to_ascii(CIRILLIC_HE + u'23.') self.assertEqual(ss, ['x23.']) ss = Homoglyphs( categories=('LATIN', 'COMMON', 'CYRILLIC'), ascii_strategy=STRATEGY_IGNORE, ).to_ascii(u'xхч2') self.assertEqual(ss, []) ss = Homoglyphs( categories=('LATIN', 'COMMON', 'CYRILLIC'), ascii_strategy=STRATEGY_REMOVE, ).to_ascii(u'xхч2') self.assertEqual(ss, ['xx2'])
def test_to_ascii(self): ss = Homoglyphs(strategy=STRATEGY_LOAD).to_ascii(CIRILLIC_HE) self.assertEqual(ss, ['x']) ss = Homoglyphs(strategy=STRATEGY_LOAD).to_ascii(CIRILLIC_HE) self.assertEqual(ss, ['x']) ss = Homoglyphs(strategy=STRATEGY_LOAD, ascii_range=range(256)).to_ascii(CIRILLIC_HE + u'23.') self.assertEqual(ss, ['x23.', u'×23.']) homoglyphs = Homoglyphs(languages={'en'}, strategy=STRATEGY_LOAD) self.assertEqual(homoglyphs.to_ascii('ТЕСТ'), ['TECT'])
def test_get_table(self): alphabet = Categories.get_alphabet(['LATIN']) table = Homoglyphs.get_table(alphabet) self.assertIn('s', table) self.assertNotIn(CIRILLIC_ES, table)