def test_open_existing_dictionary(temp_dir):
    # Create not existing language.
    with Dictionary.open("english", create=True, _database_path=temp_dir) as _:
        pass
    # Open newly created language
    with Dictionary.open("english",
                         _database_path=temp_dir) as english_dictionary:
        assert english_dictionary._already_created()
def test_create_language(temp_dir):
    """Test a new language creation at database."""
    english_dictionary = Dictionary("english", database_path=temp_dir)
    english_dictionary._open()
    assert not english_dictionary._already_created()
    english_dictionary._create_dictionary()
    assert english_dictionary._already_created()
    english_dictionary._close()
Beispiel #3
0
def _dictionary_word_key_generator(
        _database_path: Optional[str] = None) -> Iterator[str]:
    """ Iterate through every word in our dictionaries. """
    available_languages = Dictionary.get_available_languages(_database_path)
    for language in available_languages:
        with Dictionary.open(language, False,
                             _database_path) as language_dictionary:
            words = language_dictionary.get_all_words()
            for word in words:
                yield word
def test_populate_database_histogram_from_text_file(temp_dir):
    text_file_pathname = "cifra/tests/resources/english_book.txt"
    with Dictionary.open("english", create=True,
                         _database_path=temp_dir) as current_dictionary:
        current_dictionary.populate(text_file_pathname)
    with Dictionary.open("english", create=False,
                         _database_path=temp_dir) as current_dictionary:
        assert current_dictionary.letter_histogram["e"] == 35127
        assert current_dictionary.letter_histogram["t"] == 26406
        assert current_dictionary.letter_histogram["a"] == 24684
        assert current_dictionary.letter_histogram["o"] == 22983
Beispiel #5
0
def main(args=sys.argv[1:], _database_path=None) -> None:
    arguments: Dict[str, str] = parse_arguments(args)

    # DICTIONARY MANAGEMENT
    if arguments["mode"] == "dictionary":
        if arguments["action"] == "create":
            initial_words_file = arguments.get("initial_words_file", None)
            with Dictionary.open(arguments["dictionary_name"],
                                 create=True,
                                 _database_path=_database_path) as dictionary:
                if initial_words_file is not None:
                    dictionary.populate(initial_words_file)
        elif arguments["action"] == "delete":
            Dictionary.remove_dictionary(arguments["dictionary_name"],
                                         _database_path=_database_path)
        elif arguments["action"] == "update":
            with Dictionary.open(arguments["dictionary_name"],
                                 create=False,
                                 _database_path=_database_path) as dictionary:
                dictionary.populate(arguments["words_file"])
        elif arguments["action"] == "list":
            dictionaries = Dictionary.get_available_languages(
                _database_path=_database_path)
            for dictionary in dictionaries:
                print(dictionary)

    # CIPHERING MANAGEMENT
    elif arguments["mode"] == "cipher":
        ciphered_content = _process_file_with_key(
            arguments["file_to_cipher"],
            Algorithm.from_string(arguments["algorithm"]), arguments["key"],
            MessageOperation.from_string(arguments["mode"]),
            arguments["charset"] if "charset" in arguments else None)
        _output_result(ciphered_content, arguments)

    # DECIPHERING MANAGEMENT
    elif arguments["mode"] == "decipher":
        deciphered_content = _process_file_with_key(
            arguments["file_to_decipher"],
            Algorithm.from_string(arguments["algorithm"]), arguments["key"],
            MessageOperation.from_string(arguments["mode"]),
            arguments["charset"] if "charset" in arguments else None)
        _output_result(deciphered_content, arguments)

    # ATTACK MANAGEMENT
    elif arguments["mode"] == "attack":
        recovered_content = _attack_file(
            arguments["file_to_attack"],
            Algorithm.from_string(arguments["algorithm"]),
            arguments["charset"] if "charset" in arguments else None,
            _database_path=_database_path)
        _output_result(recovered_content, arguments)
def test_populate_words_from_text_files(temporary_text_file):
    text_file = temporary_text_file[0].name
    text_without_punctuation_marks = temporary_text_file[1]
    current_language = temporary_text_file[2]
    temp_dir = temporary_text_file[3]
    expected_set = set(text_without_punctuation_marks.lower().split())
    with Dictionary.open(current_language,
                         create=True,
                         _database_path=temp_dir) as current_dictionary:
        current_dictionary.populate(text_file)
    with Dictionary.open(current_language,
                         _database_path=temp_dir) as current_dictionary:
        for word in expected_set:
            assert current_dictionary.word_exists(word)
def test_delete_language(loaded_dictionary_temp_dir):
    """Test delete a language also removes its words."""
    language_to_remove = "german"
    Dictionary.remove_dictionary(language_to_remove,
                                 _database_path=loaded_dictionary_temp_dir)
    # Check all words from removed language have been removed too.
    not_existing_dictionary = Dictionary(language_to_remove,
                                         loaded_dictionary_temp_dir)
    not_existing_dictionary._open()
    assert all(not not_existing_dictionary.word_exists(word, _testing=True)
               for word in MICRO_DICTIONARIES[language_to_remove])
    not_existing_dictionary._close()
def test_get_all_words(loaded_dictionary_temp_dir):
    expected_words = ["yes", "no", "dog", "cat", "snake"]
    with Dictionary.open(
            "english", False,
            _database_path=loaded_dictionary_temp_dir) as dictionary:
        returned_words = dictionary.get_all_words()
    assert set(returned_words) == set(expected_words)
def loaded_dictionary_temp_dir(tmp_path):
    """Create a dictionary at a temp dir filled with only a handful of words.

    :return: Yields created temp_dir to host temporal dictionary database.
    """
    # Load test data.
    for language, words in MICRO_DICTIONARIES.items():
        with Dictionary.open(language, create=True,
                             _database_path=tmp_path) as language_dictionary:
            _ = [language_dictionary.add_word(word) for word in words]
    # Check all words are stored at database:
    for language, words in MICRO_DICTIONARIES.items():
        with Dictionary.open(language,
                             _database_path=tmp_path) as language_dictionary:
            assert all(language_dictionary.word_exists(word) for word in words)
    yield tmp_path
def test_add_multiple_words(temp_dir):
    language = "english"
    with Dictionary.open(language, create=True,
                         _database_path=temp_dir) as dictionary:
        assert all(not dictionary.word_exists(word)
                   for word in MICRO_DICTIONARIES[language])
        dictionary.add_multiple_words(MICRO_DICTIONARIES[language])
        assert all(
            dictionary.word_exists(word)
            for word in MICRO_DICTIONARIES[language])
def test_cwd_word(temp_dir):
    """Test if we can check for word existence, write a new word and finally delete it."""
    word = "test"
    with Dictionary.open("english", create=True,
                         _database_path=temp_dir) as english_dictionary:
        assert not english_dictionary.word_exists(word)
        english_dictionary.add_word(word)
        assert english_dictionary.word_exists(word)
        english_dictionary.remove_word(word)
        assert not english_dictionary.word_exists(word)
def test_store_word_pattern(temp_dir):
    """Test word pattern is properly stored at database."""
    word = "classification"
    with Dictionary.open("test", create=True,
                         _database_path=temp_dir) as test_dictionary:
        assert not test_dictionary.word_exists(word)
        test_dictionary.add_word(word)
        assert test_dictionary.word_exists(word)
        words = test_dictionary.get_words_with_pattern(
            "0.1.2.3.3.4.5.4.0.2.6.4.7.8")
        assert word in words
Beispiel #13
0
def frequency_key_generator(
        ciphered_text: str,
        maximum_key_length: int = 5,
        _database_path: Optional[str] = None) -> Iterator[str]:
    """ Assess statistically given ciphertext to return most likely keys.

    :param ciphered_text: Text to be deciphered.
    :param maximum_key_length: Give keys up to given maximum key length.
    :param _database_path: Absolute pathname to database file. Usually you don't
        set this parameter, but it is useful for tests.
    :return: An iterator through most likely keys below given length.
    """
    likely_key_lengths = _get_likely_key_lengths(ciphered_text,
                                                 maximum_key_length)
    keys_to_try: List[str] = []
    for language in Dictionary.get_available_languages(_database_path):
        with Dictionary.open(language, False,
                             _database_path) as language_dictionary:
            for key_length in likely_key_lengths:
                substrings = get_substrings(ciphered_text, key_length)
                likely_keys = _get_likely_keys(substrings, language_dictionary)
                keys_to_try.extend(likely_keys)
    for key in keys_to_try:
        yield key
Beispiel #14
0
def _get_word_mapping(charset: str, ciphered_word: str, dictionary: Dictionary) -> Mapping:
    """ Create a mapping with characters candidates for given ciphered word.

    :param charset: Charset used for substitution method. Both ends, ciphering
     and deciphering, should use the same charset or original text won't be properly
     recovered.
    :param ciphered_word: Ciphered word used to find words with similar patterns.
    :param dictionary: Dictionary to extract from words with the same pattern than ciphered word.
    :return: A Mapping class instance.
    """
    word_mapping = Mapping(charset)
    ciphered_word_pattern = get_word_pattern(ciphered_word)
    word_candidates = dictionary.get_words_with_pattern(ciphered_word_pattern)
    for index, char in enumerate(ciphered_word):
        for word_candidate in word_candidates:
            word_mapping[char].add(word_candidate[index])
    return word_mapping
Beispiel #15
0
def hack_substitution_mp(ciphered_text: str, charset: str = DEFAULT_CHARSET,
                         _database_path: Optional[str] = None) -> (str, float):
    """ Get substitution ciphered text key.

    Uses a word pattern matching technique to identify used language.

    **You should use this function instead of *hack_substitution*.**

    Whereas *hack_substitution* uses a sequential approach, this function uses
    multiprocessing to improve performance.

    :param ciphered_text: Text to be deciphered.
    :param charset: Charset used for substitution method. Both ends, ciphering
     and deciphering, should use the same charset or original text won't be properly
     recovered.
    :param _database_path: Absolute pathname to database file. Usually you don't
     set this parameter, but it is useful for tests.
    :return: A tuple with substitution key found and success probability.
    """
    ciphered_words = get_words_from_text(ciphered_text)
    available_languages = Dictionary.get_available_languages(_database_path=_database_path)
    keys_found: Dict[str, float] = dict()  # Keys are charset keys and values valid probabilities.
    with multiprocessing.Pool(_get_usable_cpus()) as pool:
        nargs = ((language, ciphered_words, charset, _database_path) for language in available_languages)
        possible_mappings: List[Tuple[List[Mapping], str]] = pool.starmap(_get_possible_mappings, nargs)
        # I could have passed the entire mappings list to _assess_candidates_keys() but
        # in my tests I've discovered to be more perfomant to extract every element from
        # mappings list and passing them as one element lists.
        nargs = ((ciphered_text, language, [mapping], charset, _database_path)
                 for mappings, language in possible_mappings for mapping in mappings)
        language_keys_list: List[Dict[str, float]] = pool.starmap(_assess_candidate_keys, nargs)
        for language_keys in language_keys_list:
            # It would be extremely odd, but two languages may generate the same key.
            # So we must keep the one with higher probability.
            for key in keys_found:
                if key in language_keys:
                    if language_keys[key] < keys_found[key]:
                        language_keys.pop(key)
            # Now, languages_keys should have keys not yet present at keys_found or
            # with smaller probability.
            keys_found.update(language_keys)
    best_key, best_probability = _get_best_key(keys_found)
    return best_key, best_probability
Beispiel #16
0
def _generate_language_mapping(language: str, ciphered_words: Set[str],
                               charset: str = DEFAULT_CHARSET,
                               _database_path: Optional[str] = None) -> Mapping:
    """ Generate a mapping with all letter candidates in given language for every
    cipherletter.

    :param language: Language to look letter candidates into.
    :param ciphered_words: Every cipherword in message.
    :param charset: Charset used for substitution. Both ends, ciphering
        and deciphering, should use the same charset or original text won't be properly
        recovered.
    :param _database_path: Absolute pathname to database file. Usually you don't
        set this parameter, but it is useful for tests.
    :return: Mapping loaded with all candidates in given language.
    """
    language_mapping = Mapping(charset)
    with Dictionary.open(language, False, _database_path=_database_path) as dictionary:
        for ciphered_word in ciphered_words:
            word_mapping = _get_word_mapping(charset, ciphered_word, dictionary)
            language_mapping.reduce_mapping(word_mapping)
    return language_mapping
def loaded_dictionaries() -> LoadedDictionaries:
    """Create a dictionaries database at a temp dir filled with four languages.

    Languages in database are: english, spanish, french and german.

    :return: Yields a LoadedDictionary fill info of temporal dictionaries database.
    """
    with tempfile.TemporaryDirectory() as temp_dir:
        resources_path = os.path.join(temp_dir, "resources")
        os.mkdir(resources_path)
        copy_files([
            f"cifra/tests/resources/{language}_book.txt"
            for language in LANGUAGES
        ], resources_path)
        for language in LANGUAGES:
            with Dictionary.open(language=language,
                                 create=True,
                                 _database_path=temp_dir) as dictionary:
                language_book = os.path.join(temp_dir,
                                             f"resources/{language}_book.txt")
                dictionary.populate(language_book)
        yield LoadedDictionaries(temp_dir=temp_dir, languages=LANGUAGES)
Beispiel #18
0
def hack_substitution(ciphered_text: str, charset: str = DEFAULT_CHARSET,
                      _database_path: Optional[str] = None) -> (str, float):
    """ Get substitution ciphered text key.

    Uses a word pattern matching technique to identify used language.

    **You should not use this function. Use *hack_substitution_mp* instead.** This
    function is slower than *mp* one because is sequential while the other uses a
    multiprocessing approach. This function only stay here to allow comparisons
    between sequential and multiprocessing approaches.

    :param ciphered_text: Text to be deciphered.
    :param charset: Charset used for substitution method. Both ends, ciphering
     and deciphering, should use the same charset or original text won't be properly
     recovered.
    :param _database_path: Absolute pathname to database file. Usually you don't
     set this parameter, but it is useful for tests.
    :return: A tuple with substitution key found and success probability.
    """
    ciphered_words = get_words_from_text(ciphered_text)
    available_languages = Dictionary.get_available_languages(_database_path=_database_path)
    keys_found: Dict[str, float] = dict()  # Keys are charset keys and values valid probabilities.
    for language in available_languages:
        possible_mappings, _ = _get_possible_mappings(language, ciphered_words, charset, _database_path)
        language_keys = _assess_candidate_keys(ciphered_text, language, possible_mappings, charset, _database_path)
        # It would be extremely odd, but two languages may generate the same key.
        # So we must keep the one with higher probability.
        for key in keys_found:
            if key in language_keys:
                if language_keys[key] < keys_found[key]:
                    language_keys.pop(key)
        # Now, languages_keys should have keys not yet present at keys_found or
        # with smaller probability.
        keys_found.update(language_keys)
    best_key, best_probability = _get_best_key(keys_found)
    return best_key, best_probability
def test_get_dictionaries_names(loaded_dictionaries: LoadedDictionaries):
    dictionaries_names = Dictionary.get_available_languages(
        _database_path=loaded_dictionaries.temp_dir)
    assert dictionaries_names == loaded_dictionaries.languages
def test_open_not_existing_dictionary(temp_dir):
    with pytest.raises(NotExistingLanguage):
        with Dictionary.open("english", _database_path=temp_dir) as _:
            pass