-
Notifications
You must be signed in to change notification settings - Fork 0
/
andrei.py
executable file
·185 lines (158 loc) · 6.16 KB
/
andrei.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Markov's own password generator, approved by Shannon.
Passwords are generated based on a markovian model of a word corpus. If the
corpus contains prononceable words, generated words will probably be
prononceable too. The password will be sequence of words generated by the
model joined by a configurable separator. One special character is added after
each word. Words will be added until the target entropy is reached. Models are
stored in ~/.local/share/andrei.
Usage:
andrei --help
andrei [--clip] [--entropy N] [--model NAME] [--min-word-len X]
[--max-word-len Y] [--specials SPECIALS] [--sep SEP]
andrei modelize [--filter=REGEX] STATE_SIZE NAME FILE...
Options:
-h, --help show this help message
-c, --clip copy the password to the clipboard instead of printing it
-e N, --entropy=N minimum entropy for the password [default: 50]
-m NAME, --model=NAME model used for generation [default: latin_3]
--min-word-len=X discard words shorter than X [default: 5]
--max-word-len=Y discard words longer than Y [default: 10]
--specials=SPECIALS special characters to put after each word [default: 0123456789!@#$%^&*?+=]
--sep=SEP word separator [default: -]
--filter=REGEX regex used to filter words in corpus files [default: \\b(\w+)\\b]
Note:
Python's `os.urandom` is used as a secure randomness source. If it is
unavailable, a warning will be emitted and it will fall back to python's
default random generator (Mersenne Twister). In the later case you SHOULD NOT
use the generated password for security purpose. In any case, USE AT YOUR OWN
RISK. See https://docs.python.org/3.5/library/os.html#os.urandom for more
info.
"""
from bisect import bisect
from collections import namedtuple
from itertools import accumulate
from math import log, ceil
from os import path, makedirs
import pickle
import random
import re
import textwrap
from docopt import docopt
import logbook
#import pyperclip
_sysrand = random.SystemRandom()
try:
_sysrand.randrange(10)
except NotImplementedError:
logbook.warning('could not find a reliable randomness source: DO NOT USE '
'IT FOR SECURITY PURPOSE')
randrange = random.randrange
else:
randrange = _sysrand.randrange
def word_list(files, filter=r'\b([a-zA-Z][a-zA-Z]+)\b'):
pat = re.compile(filter)
words = []
for f in files:
with open(f) as s:
words.extend(pat.findall(s.read()))
return words #[w.lower() for w in words]
def count_transitions(words, n):
trans = {}
for w in words:
xs = '\x00'*n + w + '\x01'
for i in range(len(w)+1):
curr = xs[i:i+n]
succ = xs[i+n]
if curr not in trans:
trans[curr] = {succ: 1}
elif succ not in trans[curr]:
trans[curr][succ] = 1
else:
trans[curr][succ] += 1
return trans
Node = namedtuple('Node', ('choices', 'cumdist', 'entropy'))
def build_model(trans):
model = {}
for (state, succs) in trans.items():
tot = sum(succs.values())
ord = tuple(succs.items()) # get some fixed order
model[state] = Node(
choices=tuple(x[0] for x in ord),
cumdist=tuple(accumulate([x[1] for x in ord])),
entropy=-sum(f/tot * log(f/tot, 2) for f in succs.values()))
return model
class Generator:
def __init__(self, words=None, state_size=None, path=None):
if path is not None:
with open(path, 'rb') as s:
self.state_size, self.model = pickle.load(s)
else:
assert words is not None and state_size is not None, 'bad arguments'
self.state_size = state_size
trans = count_transitions(words, state_size)
self.model = build_model(trans)
def dump_model(self, path):
with open(path, 'wb') as s:
pickle.dump((self.state_size, self.model), s)
def generate(self):
state = '\x00' * self.state_size
out = []
entropy = 0
while True:
node = self.model[state]
entropy += node.entropy
r = randrange(node.cumdist[-1])
succ = node.choices[bisect(node.cumdist, r)]
if succ == '\x01':
return ''.join(out), entropy
out.append(succ)
state = state[1:] + succ
def generate_password(self, min_entropy, min_word_len=5, max_word_len=10,
specials='0123456789!@#$%^&*?+=~', sep='-'):
words = []
entropy = 0
n = len(specials)
sp_ent = log(n, 2) if n > 0 else 0
while entropy < min_entropy:
while True:
w, e = self.generate()
if min_word_len <= len(w) < max_word_len:
break
entropy += e
if n > 0:
s = specials[randrange(n)]
entropy += sp_ent
words.append(w + s)
else:
words.append(w)
return (sep.join(words), entropy)
def main():
logbook.StderrHandler(format_string='{record.level_name}: {record.message}').push_application()
args = docopt(__doc__)
BASE = path.expanduser(path.join('~', '.local', 'share', 'andrei'))
makedirs(BASE, exist_ok=True)
if args['modelize']:
words = word_list(args['FILE'], args['--filter'])
logbook.info(textwrap.shorten('Found {} words: {}'.format(
len(words),
', '.join(words[:50])), width=70, placeholder='...'))
gen = Generator(words, int(args['STATE_SIZE']))
gen.dump_model(path.join(BASE, args['NAME']))
print('Successfully generated model {}'.format(args['NAME']))
else:
gen = Generator(path=path.join(BASE, args['--model']))
pw, ent = gen.generate_password(
int(args['--entropy']),
int(args['--min-word-len']),
int(args['--max-word-len']),
args['--specials'],
args['--sep'])
logbook.info('entropy: {:.3f}'.format(ent))
if args['--clip']:
pyperclip.copy(pw)
else:
print(pw)
if __name__ == '__main__':
main()