def test_open_existing_dictionary(temp_dir): # Create not existing language. with Dictionary.open("english", create=True, _database_path=temp_dir) as _: pass # Open newly created language with Dictionary.open("english", _database_path=temp_dir) as english_dictionary: assert english_dictionary._already_created()
def test_create_language(temp_dir): """Test a new language creation at database.""" english_dictionary = Dictionary("english", database_path=temp_dir) english_dictionary._open() assert not english_dictionary._already_created() english_dictionary._create_dictionary() assert english_dictionary._already_created() english_dictionary._close()
def _dictionary_word_key_generator( _database_path: Optional[str] = None) -> Iterator[str]: """ Iterate through every word in our dictionaries. """ available_languages = Dictionary.get_available_languages(_database_path) for language in available_languages: with Dictionary.open(language, False, _database_path) as language_dictionary: words = language_dictionary.get_all_words() for word in words: yield word
def test_populate_database_histogram_from_text_file(temp_dir): text_file_pathname = "cifra/tests/resources/english_book.txt" with Dictionary.open("english", create=True, _database_path=temp_dir) as current_dictionary: current_dictionary.populate(text_file_pathname) with Dictionary.open("english", create=False, _database_path=temp_dir) as current_dictionary: assert current_dictionary.letter_histogram["e"] == 35127 assert current_dictionary.letter_histogram["t"] == 26406 assert current_dictionary.letter_histogram["a"] == 24684 assert current_dictionary.letter_histogram["o"] == 22983
def main(args=sys.argv[1:], _database_path=None) -> None: arguments: Dict[str, str] = parse_arguments(args) # DICTIONARY MANAGEMENT if arguments["mode"] == "dictionary": if arguments["action"] == "create": initial_words_file = arguments.get("initial_words_file", None) with Dictionary.open(arguments["dictionary_name"], create=True, _database_path=_database_path) as dictionary: if initial_words_file is not None: dictionary.populate(initial_words_file) elif arguments["action"] == "delete": Dictionary.remove_dictionary(arguments["dictionary_name"], _database_path=_database_path) elif arguments["action"] == "update": with Dictionary.open(arguments["dictionary_name"], create=False, _database_path=_database_path) as dictionary: dictionary.populate(arguments["words_file"]) elif arguments["action"] == "list": dictionaries = Dictionary.get_available_languages( _database_path=_database_path) for dictionary in dictionaries: print(dictionary) # CIPHERING MANAGEMENT elif arguments["mode"] == "cipher": ciphered_content = _process_file_with_key( arguments["file_to_cipher"], Algorithm.from_string(arguments["algorithm"]), arguments["key"], MessageOperation.from_string(arguments["mode"]), arguments["charset"] if "charset" in arguments else None) _output_result(ciphered_content, arguments) # DECIPHERING MANAGEMENT elif arguments["mode"] == "decipher": deciphered_content = _process_file_with_key( arguments["file_to_decipher"], Algorithm.from_string(arguments["algorithm"]), arguments["key"], MessageOperation.from_string(arguments["mode"]), arguments["charset"] if "charset" in arguments else None) _output_result(deciphered_content, arguments) # ATTACK MANAGEMENT elif arguments["mode"] == "attack": recovered_content = _attack_file( arguments["file_to_attack"], Algorithm.from_string(arguments["algorithm"]), arguments["charset"] if "charset" in arguments else None, _database_path=_database_path) _output_result(recovered_content, arguments)
def test_populate_words_from_text_files(temporary_text_file): text_file = temporary_text_file[0].name text_without_punctuation_marks = temporary_text_file[1] current_language = temporary_text_file[2] temp_dir = temporary_text_file[3] expected_set = set(text_without_punctuation_marks.lower().split()) with Dictionary.open(current_language, create=True, _database_path=temp_dir) as current_dictionary: current_dictionary.populate(text_file) with Dictionary.open(current_language, _database_path=temp_dir) as current_dictionary: for word in expected_set: assert current_dictionary.word_exists(word)
def test_delete_language(loaded_dictionary_temp_dir): """Test delete a language also removes its words.""" language_to_remove = "german" Dictionary.remove_dictionary(language_to_remove, _database_path=loaded_dictionary_temp_dir) # Check all words from removed language have been removed too. not_existing_dictionary = Dictionary(language_to_remove, loaded_dictionary_temp_dir) not_existing_dictionary._open() assert all(not not_existing_dictionary.word_exists(word, _testing=True) for word in MICRO_DICTIONARIES[language_to_remove]) not_existing_dictionary._close()
def test_get_all_words(loaded_dictionary_temp_dir): expected_words = ["yes", "no", "dog", "cat", "snake"] with Dictionary.open( "english", False, _database_path=loaded_dictionary_temp_dir) as dictionary: returned_words = dictionary.get_all_words() assert set(returned_words) == set(expected_words)
def loaded_dictionary_temp_dir(tmp_path): """Create a dictionary at a temp dir filled with only a handful of words. :return: Yields created temp_dir to host temporal dictionary database. """ # Load test data. for language, words in MICRO_DICTIONARIES.items(): with Dictionary.open(language, create=True, _database_path=tmp_path) as language_dictionary: _ = [language_dictionary.add_word(word) for word in words] # Check all words are stored at database: for language, words in MICRO_DICTIONARIES.items(): with Dictionary.open(language, _database_path=tmp_path) as language_dictionary: assert all(language_dictionary.word_exists(word) for word in words) yield tmp_path
def test_add_multiple_words(temp_dir): language = "english" with Dictionary.open(language, create=True, _database_path=temp_dir) as dictionary: assert all(not dictionary.word_exists(word) for word in MICRO_DICTIONARIES[language]) dictionary.add_multiple_words(MICRO_DICTIONARIES[language]) assert all( dictionary.word_exists(word) for word in MICRO_DICTIONARIES[language])
def test_cwd_word(temp_dir): """Test if we can check for word existence, write a new word and finally delete it.""" word = "test" with Dictionary.open("english", create=True, _database_path=temp_dir) as english_dictionary: assert not english_dictionary.word_exists(word) english_dictionary.add_word(word) assert english_dictionary.word_exists(word) english_dictionary.remove_word(word) assert not english_dictionary.word_exists(word)
def test_store_word_pattern(temp_dir): """Test word pattern is properly stored at database.""" word = "classification" with Dictionary.open("test", create=True, _database_path=temp_dir) as test_dictionary: assert not test_dictionary.word_exists(word) test_dictionary.add_word(word) assert test_dictionary.word_exists(word) words = test_dictionary.get_words_with_pattern( "0.1.2.3.3.4.5.4.0.2.6.4.7.8") assert word in words
def frequency_key_generator( ciphered_text: str, maximum_key_length: int = 5, _database_path: Optional[str] = None) -> Iterator[str]: """ Assess statistically given ciphertext to return most likely keys. :param ciphered_text: Text to be deciphered. :param maximum_key_length: Give keys up to given maximum key length. :param _database_path: Absolute pathname to database file. Usually you don't set this parameter, but it is useful for tests. :return: An iterator through most likely keys below given length. """ likely_key_lengths = _get_likely_key_lengths(ciphered_text, maximum_key_length) keys_to_try: List[str] = [] for language in Dictionary.get_available_languages(_database_path): with Dictionary.open(language, False, _database_path) as language_dictionary: for key_length in likely_key_lengths: substrings = get_substrings(ciphered_text, key_length) likely_keys = _get_likely_keys(substrings, language_dictionary) keys_to_try.extend(likely_keys) for key in keys_to_try: yield key
def _get_word_mapping(charset: str, ciphered_word: str, dictionary: Dictionary) -> Mapping: """ Create a mapping with characters candidates for given ciphered word. :param charset: Charset used for substitution method. Both ends, ciphering and deciphering, should use the same charset or original text won't be properly recovered. :param ciphered_word: Ciphered word used to find words with similar patterns. :param dictionary: Dictionary to extract from words with the same pattern than ciphered word. :return: A Mapping class instance. """ word_mapping = Mapping(charset) ciphered_word_pattern = get_word_pattern(ciphered_word) word_candidates = dictionary.get_words_with_pattern(ciphered_word_pattern) for index, char in enumerate(ciphered_word): for word_candidate in word_candidates: word_mapping[char].add(word_candidate[index]) return word_mapping
def hack_substitution_mp(ciphered_text: str, charset: str = DEFAULT_CHARSET, _database_path: Optional[str] = None) -> (str, float): """ Get substitution ciphered text key. Uses a word pattern matching technique to identify used language. **You should use this function instead of *hack_substitution*.** Whereas *hack_substitution* uses a sequential approach, this function uses multiprocessing to improve performance. :param ciphered_text: Text to be deciphered. :param charset: Charset used for substitution method. Both ends, ciphering and deciphering, should use the same charset or original text won't be properly recovered. :param _database_path: Absolute pathname to database file. Usually you don't set this parameter, but it is useful for tests. :return: A tuple with substitution key found and success probability. """ ciphered_words = get_words_from_text(ciphered_text) available_languages = Dictionary.get_available_languages(_database_path=_database_path) keys_found: Dict[str, float] = dict() # Keys are charset keys and values valid probabilities. with multiprocessing.Pool(_get_usable_cpus()) as pool: nargs = ((language, ciphered_words, charset, _database_path) for language in available_languages) possible_mappings: List[Tuple[List[Mapping], str]] = pool.starmap(_get_possible_mappings, nargs) # I could have passed the entire mappings list to _assess_candidates_keys() but # in my tests I've discovered to be more perfomant to extract every element from # mappings list and passing them as one element lists. nargs = ((ciphered_text, language, [mapping], charset, _database_path) for mappings, language in possible_mappings for mapping in mappings) language_keys_list: List[Dict[str, float]] = pool.starmap(_assess_candidate_keys, nargs) for language_keys in language_keys_list: # It would be extremely odd, but two languages may generate the same key. # So we must keep the one with higher probability. for key in keys_found: if key in language_keys: if language_keys[key] < keys_found[key]: language_keys.pop(key) # Now, languages_keys should have keys not yet present at keys_found or # with smaller probability. keys_found.update(language_keys) best_key, best_probability = _get_best_key(keys_found) return best_key, best_probability
def _generate_language_mapping(language: str, ciphered_words: Set[str], charset: str = DEFAULT_CHARSET, _database_path: Optional[str] = None) -> Mapping: """ Generate a mapping with all letter candidates in given language for every cipherletter. :param language: Language to look letter candidates into. :param ciphered_words: Every cipherword in message. :param charset: Charset used for substitution. Both ends, ciphering and deciphering, should use the same charset or original text won't be properly recovered. :param _database_path: Absolute pathname to database file. Usually you don't set this parameter, but it is useful for tests. :return: Mapping loaded with all candidates in given language. """ language_mapping = Mapping(charset) with Dictionary.open(language, False, _database_path=_database_path) as dictionary: for ciphered_word in ciphered_words: word_mapping = _get_word_mapping(charset, ciphered_word, dictionary) language_mapping.reduce_mapping(word_mapping) return language_mapping
def loaded_dictionaries() -> LoadedDictionaries: """Create a dictionaries database at a temp dir filled with four languages. Languages in database are: english, spanish, french and german. :return: Yields a LoadedDictionary fill info of temporal dictionaries database. """ with tempfile.TemporaryDirectory() as temp_dir: resources_path = os.path.join(temp_dir, "resources") os.mkdir(resources_path) copy_files([ f"cifra/tests/resources/{language}_book.txt" for language in LANGUAGES ], resources_path) for language in LANGUAGES: with Dictionary.open(language=language, create=True, _database_path=temp_dir) as dictionary: language_book = os.path.join(temp_dir, f"resources/{language}_book.txt") dictionary.populate(language_book) yield LoadedDictionaries(temp_dir=temp_dir, languages=LANGUAGES)
def hack_substitution(ciphered_text: str, charset: str = DEFAULT_CHARSET, _database_path: Optional[str] = None) -> (str, float): """ Get substitution ciphered text key. Uses a word pattern matching technique to identify used language. **You should not use this function. Use *hack_substitution_mp* instead.** This function is slower than *mp* one because is sequential while the other uses a multiprocessing approach. This function only stay here to allow comparisons between sequential and multiprocessing approaches. :param ciphered_text: Text to be deciphered. :param charset: Charset used for substitution method. Both ends, ciphering and deciphering, should use the same charset or original text won't be properly recovered. :param _database_path: Absolute pathname to database file. Usually you don't set this parameter, but it is useful for tests. :return: A tuple with substitution key found and success probability. """ ciphered_words = get_words_from_text(ciphered_text) available_languages = Dictionary.get_available_languages(_database_path=_database_path) keys_found: Dict[str, float] = dict() # Keys are charset keys and values valid probabilities. for language in available_languages: possible_mappings, _ = _get_possible_mappings(language, ciphered_words, charset, _database_path) language_keys = _assess_candidate_keys(ciphered_text, language, possible_mappings, charset, _database_path) # It would be extremely odd, but two languages may generate the same key. # So we must keep the one with higher probability. for key in keys_found: if key in language_keys: if language_keys[key] < keys_found[key]: language_keys.pop(key) # Now, languages_keys should have keys not yet present at keys_found or # with smaller probability. keys_found.update(language_keys) best_key, best_probability = _get_best_key(keys_found) return best_key, best_probability
def test_get_dictionaries_names(loaded_dictionaries: LoadedDictionaries): dictionaries_names = Dictionary.get_available_languages( _database_path=loaded_dictionaries.temp_dir) assert dictionaries_names == loaded_dictionaries.languages
def test_open_not_existing_dictionary(temp_dir): with pytest.raises(NotExistingLanguage): with Dictionary.open("english", _database_path=temp_dir) as _: pass