Ejemplo n.º 1
0
def generate_wordlist(
        input_terms, length=8192, lowercase=True, use_kit=False,
        use_416=False, numbered=False, ascii_only=False,
        shuffle_max=True, dice_sides=DICE_SIDES):
    """Generate a diceware wordlist from dictionary list.

    `input_terms`: iterable over all strings to consider as wordlist item.

    `length`: desired length of wordlist to generate.

    `lowercase`: yield terms lowercase if set.

    `use_kit`: add terms from "dicewarekit", a wordlist with basic terms
               provided by Arnold G. Reinhold for self-baked diceware
               wordlists.

    `use_416`: add terms from another wordlist of Mr Reinhold,
               containing 416 terms.

    `ascii_only`: only accept words, that exclusively contain ASCII.

    `shuffle_max`: shuffle max width entries before cutting and sorting.
               This way a random set of max width entries gets included
               instead of the same fixed set at the beginning of all max width
               entries. I.e. not only those max width entries starting with
               ``a`` or ``b`` are included, but instead (randomly) also ``x``,
               ``y``, ``z`` might appear. By default we shuffle entries. Set
               to `False` to avoid this.

    `dice_sides`: number of sides of dice exepected to be used with the
               result list. This is important only if the output list is
               numbered. By default we expect six sides.

    Returns an iterator that yields at most `length` items. Double
    entries are removed.
    """
    if ascii_only:
        input_terms = filter_chars(input_terms, allowed=DEFAULT_CHARS)
    base_terms = list(base_terms_iterator(use_kit=use_kit, use_416=use_416))
    terms = list(set(list(input_terms) + list(base_terms)))
    terms.sort()
    if len(terms) < length:
        raise ValueError(
            "Wordlist too short: at least %s unique terms required." % length)
    if length and numbered:
        dicenum = int(math.ceil(math.log(length) / math.log(dice_sides)))
    prefix = ""
    for num, term in enumerate(sorted(min_width_iter(
            terms, length, shuffle_max))):
        if lowercase:
            term = term.lower()
        if numbered:
            prefix = idx_to_dicenums(num, dicenum, dice_sides) + " "
        yield "%s%s" % (prefix, term)