def _dictionary_word_key_generator( _database_path: Optional[str] = None) -> Iterator[str]: """ Iterate through every word in our dictionaries. """ available_languages = Dictionary.get_available_languages(_database_path) for language in available_languages: with Dictionary.open(language, False, _database_path) as language_dictionary: words = language_dictionary.get_all_words() for word in words: yield word
def main(args=sys.argv[1:], _database_path=None) -> None: arguments: Dict[str, str] = parse_arguments(args) # DICTIONARY MANAGEMENT if arguments["mode"] == "dictionary": if arguments["action"] == "create": initial_words_file = arguments.get("initial_words_file", None) with Dictionary.open(arguments["dictionary_name"], create=True, _database_path=_database_path) as dictionary: if initial_words_file is not None: dictionary.populate(initial_words_file) elif arguments["action"] == "delete": Dictionary.remove_dictionary(arguments["dictionary_name"], _database_path=_database_path) elif arguments["action"] == "update": with Dictionary.open(arguments["dictionary_name"], create=False, _database_path=_database_path) as dictionary: dictionary.populate(arguments["words_file"]) elif arguments["action"] == "list": dictionaries = Dictionary.get_available_languages( _database_path=_database_path) for dictionary in dictionaries: print(dictionary) # CIPHERING MANAGEMENT elif arguments["mode"] == "cipher": ciphered_content = _process_file_with_key( arguments["file_to_cipher"], Algorithm.from_string(arguments["algorithm"]), arguments["key"], MessageOperation.from_string(arguments["mode"]), arguments["charset"] if "charset" in arguments else None) _output_result(ciphered_content, arguments) # DECIPHERING MANAGEMENT elif arguments["mode"] == "decipher": deciphered_content = _process_file_with_key( arguments["file_to_decipher"], Algorithm.from_string(arguments["algorithm"]), arguments["key"], MessageOperation.from_string(arguments["mode"]), arguments["charset"] if "charset" in arguments else None) _output_result(deciphered_content, arguments) # ATTACK MANAGEMENT elif arguments["mode"] == "attack": recovered_content = _attack_file( arguments["file_to_attack"], Algorithm.from_string(arguments["algorithm"]), arguments["charset"] if "charset" in arguments else None, _database_path=_database_path) _output_result(recovered_content, arguments)
def hack_substitution_mp(ciphered_text: str, charset: str = DEFAULT_CHARSET, _database_path: Optional[str] = None) -> (str, float): """ Get substitution ciphered text key. Uses a word pattern matching technique to identify used language. **You should use this function instead of *hack_substitution*.** Whereas *hack_substitution* uses a sequential approach, this function uses multiprocessing to improve performance. :param ciphered_text: Text to be deciphered. :param charset: Charset used for substitution method. Both ends, ciphering and deciphering, should use the same charset or original text won't be properly recovered. :param _database_path: Absolute pathname to database file. Usually you don't set this parameter, but it is useful for tests. :return: A tuple with substitution key found and success probability. """ ciphered_words = get_words_from_text(ciphered_text) available_languages = Dictionary.get_available_languages(_database_path=_database_path) keys_found: Dict[str, float] = dict() # Keys are charset keys and values valid probabilities. with multiprocessing.Pool(_get_usable_cpus()) as pool: nargs = ((language, ciphered_words, charset, _database_path) for language in available_languages) possible_mappings: List[Tuple[List[Mapping], str]] = pool.starmap(_get_possible_mappings, nargs) # I could have passed the entire mappings list to _assess_candidates_keys() but # in my tests I've discovered to be more perfomant to extract every element from # mappings list and passing them as one element lists. nargs = ((ciphered_text, language, [mapping], charset, _database_path) for mappings, language in possible_mappings for mapping in mappings) language_keys_list: List[Dict[str, float]] = pool.starmap(_assess_candidate_keys, nargs) for language_keys in language_keys_list: # It would be extremely odd, but two languages may generate the same key. # So we must keep the one with higher probability. for key in keys_found: if key in language_keys: if language_keys[key] < keys_found[key]: language_keys.pop(key) # Now, languages_keys should have keys not yet present at keys_found or # with smaller probability. keys_found.update(language_keys) best_key, best_probability = _get_best_key(keys_found) return best_key, best_probability
def frequency_key_generator( ciphered_text: str, maximum_key_length: int = 5, _database_path: Optional[str] = None) -> Iterator[str]: """ Assess statistically given ciphertext to return most likely keys. :param ciphered_text: Text to be deciphered. :param maximum_key_length: Give keys up to given maximum key length. :param _database_path: Absolute pathname to database file. Usually you don't set this parameter, but it is useful for tests. :return: An iterator through most likely keys below given length. """ likely_key_lengths = _get_likely_key_lengths(ciphered_text, maximum_key_length) keys_to_try: List[str] = [] for language in Dictionary.get_available_languages(_database_path): with Dictionary.open(language, False, _database_path) as language_dictionary: for key_length in likely_key_lengths: substrings = get_substrings(ciphered_text, key_length) likely_keys = _get_likely_keys(substrings, language_dictionary) keys_to_try.extend(likely_keys) for key in keys_to_try: yield key
def hack_substitution(ciphered_text: str, charset: str = DEFAULT_CHARSET, _database_path: Optional[str] = None) -> (str, float): """ Get substitution ciphered text key. Uses a word pattern matching technique to identify used language. **You should not use this function. Use *hack_substitution_mp* instead.** This function is slower than *mp* one because is sequential while the other uses a multiprocessing approach. This function only stay here to allow comparisons between sequential and multiprocessing approaches. :param ciphered_text: Text to be deciphered. :param charset: Charset used for substitution method. Both ends, ciphering and deciphering, should use the same charset or original text won't be properly recovered. :param _database_path: Absolute pathname to database file. Usually you don't set this parameter, but it is useful for tests. :return: A tuple with substitution key found and success probability. """ ciphered_words = get_words_from_text(ciphered_text) available_languages = Dictionary.get_available_languages(_database_path=_database_path) keys_found: Dict[str, float] = dict() # Keys are charset keys and values valid probabilities. for language in available_languages: possible_mappings, _ = _get_possible_mappings(language, ciphered_words, charset, _database_path) language_keys = _assess_candidate_keys(ciphered_text, language, possible_mappings, charset, _database_path) # It would be extremely odd, but two languages may generate the same key. # So we must keep the one with higher probability. for key in keys_found: if key in language_keys: if language_keys[key] < keys_found[key]: language_keys.pop(key) # Now, languages_keys should have keys not yet present at keys_found or # with smaller probability. keys_found.update(language_keys) best_key, best_probability = _get_best_key(keys_found) return best_key, best_probability
def test_get_dictionaries_names(loaded_dictionaries: LoadedDictionaries): dictionaries_names = Dictionary.get_available_languages( _database_path=loaded_dictionaries.temp_dir) assert dictionaries_names == loaded_dictionaries.languages