# train english matrixes base_matrix = TrigramSpace(alphabet) english_monograms = MonogramMatrix() with open(training_text_file_location, 'r', -1, 'utf-8', 'replace') as training_text: text = training_text.read() normalized_text = utils.normalize(text) base_matrix.learn(normalized_text) english_monograms.learn(normalized_text) # generate initial guess key = SubstitutionKey("Guess key") english_monograms.setCharacterAsMostCommon(' ') guess = Guess(english_monograms.getListOfUniqueCharacters()) guess.randomGuessOneCharacter() guess_mapping = english_monograms.generateMappingBasedOnFrequencies(file_text) guess.setGuess(guess_mapping) key.set(guess.get()) if actual_key: pprint(actual_key) pprint(guess_mapping) print("score: " + str(utils.compare_keys(actual_key, guess_mapping))) current_decryption = key.decrypt(file_text) ciphertext_matrix = TrigramSpace(alphabet) ciphertext_matrix.learn(current_decryption) # compute initial difference & deep copy the key current_bigram_difference = base_matrix.compare_to(ciphertext_matrix)
def create_guess(self, file_name): mono_matrix = MonogramMatrix(file_name) guess_init = Guess(mono_matrix.getListOfUniqueCharacters()) guess_init.randomGuessOneCharacter() return guess_init.get()
from guess import Guess from monogrammatrix import MonogramMatrix from accuracyofdecryption import AccuracyOfDecryption mono = MonogramMatrix() mono.learn_from_file('training_corpora/english_word_list.txt') mono.setCharacterAsMostCommon(' ') newGuess = Guess(mono.getListOfUniqueCharacters()) newGuess.randomGuessOneCharacter() #monoMapping = mono.generateMappingBasedOnFrequencies('text_pairs/1.ciphertext.txt') #newGuess.setGuess(monoMapping) #print(monoMapping) accOfDec = AccuracyOfDecryption('training_corpora/english_word_list.txt') accurateResultsCount = accOfDec.getCountOfAccurateWords('hi my name is eric shady\ncats\tare what? I taught eminem') print(accurateResultsCount) #print(monoMatrixOfLanguageFrequencies.get()) #can do a bucket for dictionary?