def test_nonascii_chars(self):
        passwordmetrics.configure()
        # In 8-bit strings, ö is a part of Latin-1
        entropy, unkown = passwordmetrics._character_entropy(b'abcdefgh\xf6')
        self.assertEqual(unkown, set())

        # But this is not!
        entropy, unkown = passwordmetrics._character_entropy('abcdefgh\N{LATIN CAPITAL LETTER H WITH STROKE}')
        self.assertEqual(unkown, set('\N{LATIN CAPITAL LETTER H WITH STROKE}'))

        # If you want these to work, you have to make a custom configuration.
        if 'unicode' not in locals():
            unicode = str
        groups = {'lowercase': set(unicode(string.ascii_lowercase)),
                  'uppercase': set(unicode(string.ascii_uppercase)),
                  'digits': set(unicode(string.digits)),
                  'punctuation': set(unicode(string.punctuation)),
                  'whitespace': set(unicode(string.whitespace)),
                  'non-printable': set(unicode((i)) for i in range(128) if chr(i) not in string.printable),
                  'extras': set('åäöö\N{LATIN CAPITAL LETTER H WITH STROKE}'),
                  }

        passwordmetrics.configure(groups=groups)
        # This will still raise an error,
        entropy, unkown = passwordmetrics._character_entropy('abcdefghö\N{LATIN CAPITAL LETTER H WITH STROKE}')
        self.assertEqual(unkown, set())
    def test_character_entropy(self):
        entropy, unkown = passwordmetrics._character_entropy('abcdefgh')
        self.assertAlmostEqual(entropy, 37.6035177451287)

        entropy, unkown = passwordmetrics._character_entropy('12345678')
        self.assertAlmostEqual(entropy, 26.5754247590989)

        # Repeated characters count as one:
        entropy, unkown = passwordmetrics._character_entropy('aAaAaAaA')
        self.assertAlmostEqual(entropy, 11.4008794362821)

        # As good as it gets for an 8 char password
        entropy, unkown = passwordmetrics._character_entropy('xyFg98%!')
        self.assertAlmostEqual(entropy, 52.4367108134211)
    def test_substitutions(self):
        # This long password gets a reasonable score on characters alone
        self.assertAlmostEqual(passwordmetrics._character_entropy('Tr0ub4dor&3')[0], 65.54588851677637)
        # But much worse when considering it uses a word, even though it's misspelled and only
        # appears once in the whole corpus (because I put it there).
        self.assertAlmostEqual(passwordmetrics.metrics('Tr0ub4dor&3')['entropy'], 42.11714046349914)
        # Although still better than if there was no substitutions in the word
        self.assertAlmostEqual(passwordmetrics.metrics('Troubador&3')['entropy'], 31.332505617941614)

        words, rest = passwordmetrics._find_words('batterybattery', self.words)
        self.assertEquals(rest, '')

        words, rest = passwordmetrics._find_words('b4tteryb4ttery', self.words)
        self.assertEquals(rest, '4')

        words, rest = passwordmetrics._find_words('b4ttery8attery', self.words)
        self.assertEquals(rest, '48')