def getNgramScore(text, n=4): '''Score the similarity of the text to English using ngrams''' if n == 2: global biGramScore if biGramScore is None: biGramScore = ngram_score.ngram_score(os.getcwd() + '/data/english_bigrams.txt') score = biGramScore.score(text.upper()) elif n == 3: global triGramScore if triGramScore is None: triGramScore = ngram_score.ngram_score( os.getcwd() + '/data/english_trigrams.txt') score = triGramScore.score(text.upper()) elif n == 4: global quadGramScore if quadGramScore is None: quadGramScore = ngram_score.ngram_score( os.getcwd() + '/data/english_quadgrams.txt') score = quadGramScore.score(text.upper()) elif n == 5: global quinGramScore if quinGramScore is None: quinGramScore = ngram_score.ngram_score( os.getcwd() + '/data/english_quingrams.txt') score = quinGramScore.score(text.upper()) else: biScore = getNgramScore(text, 2) triScore = getNgramScore(text, 3) quadScore = getNgramScore(text, 4) quinScore = getNgramScore(text, 5) score = (biScore + triScore + quadScore + quinScore) / 4 return score
def getNgramScore(text, n=4): '''Score the similarity of the text to English using ngrams''' if n == 2: global biGramScore if biGramScore is None: biGramScore = ngram_score.ngram_score(os.getcwd() + '/data/english_bigrams.txt') score = biGramScore.score(text.upper()) elif n == 3: global triGramScore if triGramScore is None: triGramScore = ngram_score.ngram_score(os.getcwd() + '/data/english_trigrams.txt') score = triGramScore.score(text.upper()) elif n == 4: global quadGramScore if quadGramScore is None: quadGramScore = ngram_score.ngram_score(os.getcwd() + '/data/english_quadgrams.txt') score = quadGramScore.score(text.upper()) elif n == 5: global quinGramScore if quinGramScore is None: quinGramScore = ngram_score.ngram_score(os.getcwd() + '/data/english_quingrams.txt') score = quinGramScore.score(text.upper()) else: biScore = getNgramScore(text, 2) triScore = getNgramScore(text, 3) quadScore = getNgramScore(text, 4) quinScore = getNgramScore(text, 5) score = (biScore + triScore + quadScore + quinScore) / 4 return score
def AutokeyCracker(ctext, q1gram, t1gram): qgram = ngram_score(q1gram) trigram = ngram_score(t1gram) ctext = re.sub(r'[^A-Z]', '', ctext.upper()) class nbest(object): def __init__(self, N=1000): self.store = [] self.N = N def add(self, item): self.store.append(item) self.store.sort(reverse=True) self.store = self.store[:self.N] def __getitem__(self, k): return self.store[k] def __len__(self): return len(self.store) N = 100 for KLEN in range(3, 20): rec = nbest(N) for i in permutations('ABCDEFGHIJKLMNOPQRSTUVWXYZ', 3): key = ''.join(i) + 'A' * (KLEN - len(i)) pt = Autokey(key).decipher(ctext) score = 0 for j in range(0, len(ctext), KLEN): score += trigram.score(pt[j:j + 3]) rec.add((score, ''.join(i), pt[:30])) next_rec = nbest(N) for i in range(0, KLEN - 3): for k in xrange(N): for c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ': key = rec[k][1] + c fullkey = key + 'A' * (KLEN - len(key)) pt = Autokey(fullkey).decipher(ctext) score = 0 for j in range(0, len(ctext), KLEN): score += qgram.score(pt[j:j + len(key)]) next_rec.add((score, key, pt[:30])) rec = next_rec next_rec = nbest(N) bestkey = rec[0][1] pt = Autokey(bestkey).decipher(ctext) bestscore = qgram.score(pt) for i in range(N): pt = Autokey(rec[i][1]).decipher(ctext) score = qgram.score(pt) if score > bestscore: bestkey = rec[i][1] bestscore = score print('\033[1;34m[*]\033[0m Score = ' + str(bestscore) + ' ; Iteration = ' + str(KLEN) + ' ; Key = ' + bestkey + ' ; Out = ' + Autokey(bestkey).decipher(ctext))
def crack(ciphertext): distances = [] correct_key = "" best_score = -float('inf') correct_plaintext = "" fitness = ns.ngram_score('english_monograms.txt') #1. Guess key length for i in range(2, 40): d1 = float(hamming_distance(ciphertext[0:i], ciphertext[i:2 * i]) / i) d2 = float( hamming_distance(ciphertext[2 * i:3 * i], ciphertext[3 * i:4 * i]) / i) d3 = float( hamming_distance(ciphertext[4 * i:5 * i], ciphertext[5 * i:6 * i]) / i) d = float((d1 + d2 + d3) / 3) distances.append((i, d)) distances = sorted(distances, key=lambda tup: tup[1]) #2. Break ciphertext into blocks for i in range(0, 5): key_len = distances[i][0] text_blocks = ["" for _ in range(key_len)] for j in range(len(ciphertext)): text_blocks[j % key_len] = text_blocks[j % key_len] + ciphertext[j] key = "" #solve each block separately for j in range(len(text_blocks)): key = key + xor.byte_xor_crack(text_blocks[j])[0] plaintext = encrypt(ciphertext, key) score = fitness.score(plaintext) if score > best_score: best_score = score correct_plaintext = plaintext correct_key = key return (correct_key, correct_plaintext)
def AffineCracker(ctext, ngram): fitness = ngram_score(ngram) max_key = break_affine(ctext, fitness) print('\033[1;32m[+]\033[0m Best candidate with key (a,b) = ' + str(max_key[1]) + ':') print(Affine(max_key[1][0], max_key[1][1]).decipher(ctext))
def test_substitution_cipher(text, max_iterations): fitness = ngram_score( 'english_quadgrams.txt') # load our quadgram statistics ctext = re.sub('[^A-Z]', '', text.upper()) maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') maxscore = -99e9 parentscore, parentkey = maxscore, maxkey[:] for i in range(0, max_iterations): random.shuffle(parentkey) deciphered = SimpleSub(parentkey).decipher(ctext) parentscore = fitness.score(deciphered) count = 0 while count < 1000: a = random.randint(0, 25) b = random.randint(0, 25) child = parentkey[:] # swap two characters in the child child[a], child[b] = child[b], child[a] deciphered = SimpleSub(child).decipher(ctext) score = fitness.score(deciphered) # if the child was better, replace the parent with it if score > parentscore: parentscore = score parentkey = child[:] count = 0 count = count + 1 # keep track of best score seen so far if parentscore > maxscore: maxscore, maxkey = parentscore, parentkey[:] return maxscore, maxkey, SimpleSub(maxkey).decipher(ctext)
def autoDecryptTask(PipeIn, ciphertextContents): fitness = ngram_score('english_quadgrams.txt') # load our quadgram statistics maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') maxscore = -99e9 parentscore, parentkey = maxscore, maxkey[:] i = 0 while i < 1000: i = i + 1 random.shuffle(parentkey) deciphered = substitute(ciphertextContents, parentkey) parentscore = fitness.score(deciphered) count = 0 while count < 1000: a = random.randint(0, 25) b = random.randint(0, 25) child = parentkey[:] # swap two characters in the child child[a], child[b] = child[b], child[a] deciphered = substitute(ciphertextContents, child) score = fitness.score(deciphered) # if the child was better, replace the parent with it if score > parentscore: parentscore = score parentkey = child[:] count = 0 count = count + 1 # keep track of best score seen so far if parentscore > maxscore: maxscore, maxkey = parentscore, parentkey[:] # print ('\nbest score so far:',maxscore,'on iteration',i) PipeIn.send(maxkey)
def decryptUsingQuadgramLocalSearch(inputText, keyLength): inputText = "".join(inputText.lower().split()) key = initializeRandomKey(keyLength) print "INITIAL KEY: ", key ngram = ngram_score("english_quadgrams.txt") fitness = ngram.score(decrypt(inputText, key)) print "INITIAL FITNESS: ", fitness improvement = True indexOfKeyToModify = 0 while improvement == True: bestFitness = float("-inf") bestKey = "" childrenKeys = computeChildren(key, indexOfKeyToModify) indexOfKeyToModify = (indexOfKeyToModify + 1) % keyLength for childKey in childrenKeys: childScore = ngram.score(decrypt(inputText, childKey)) if childScore > bestFitness: bestFitness = childScore bestKey = childKey if bestFitness <= fitness: improvement = False else: fitness = bestFitness key = bestKey print fitness, key print key
def caesar_ngram(input_file, output_file): """ to break by using quadgram statistics Quadgrams statistics determine how similar text is to English. For example several books worth of text, and count each of the quadgrams that occur in them. We then divide these counts by the total number of quadgrams encountered to find the probability of each. Sources from: http://practicalcryptography.com/cryptanalysis/text-characterisation/quadgrams/#a-python-implementation https://github.com/jameslyons/python_cryptanalysis/blob/master/break_caesar.py """ ctext = input_file.read() original_text = ctext fitness = ngram_score('quadgrams.txt') # load our quadgram statistics # make sure ciphertext has all spacing/punc removed and is uppercase ctext = re.sub('[^A-Z]', '', ctext.upper()) # try all possible keys, return the one with the highest fitness scores = [] for i in range(26): scores.append((fitness.score(caesar_cipher(ctext, i)), i)) max_key = max(scores) print("The most possible key = " + str(max_key[1])) print("Check out your output file.") output_file.write(caesar_cipher(original_text, int(max_key[1])))
def VigenereCracker(ctext, q1gram, t1gram): qgram = ngram_score(q1gram) trigram = ngram_score(t1gram) ctext = re.sub(r'[^A-Z]', '', ctext.upper()) N = 100 for KLEN in range(3, 20): rec = nbest(N) for i in permutations('ABCDEFGHIJKLMNOPQRSTUVWXYZ', 3): key = ''.join(i) + 'A' * (KLEN - len(i)) pt = Vigenere(key).decipher(ctext) score = 0 for j in range(0, len(ctext), KLEN): score += trigram.score(pt[j:j + 3]) rec.add((score, ''.join(i), pt[:30])) next_rec = nbest(N) for i in range(0, KLEN - 3): for k in xrange(N): for c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ': key = rec[k][1] + c fullkey = key + 'A' * (KLEN - len(key)) pt = Vigenere(fullkey).decipher(ctext) score = 0 for j in range(0, len(ctext), KLEN): score += qgram.score(pt[j:j + len(key)]) next_rec.add((score, key, pt[:30])) rec = next_rec next_rec = nbest(N) bestkey = rec[0][1] pt = Vigenere(bestkey).decipher(ctext) bestscore = qgram.score(pt) for i in range(N): pt = Vigenere(rec[i][1]).decipher(ctext) score = qgram.score(pt) if score > bestscore: bestkey = rec[i][1] bestscore = score print('\033[1;34m[*]\033[0m Score = ' + str(bestscore) + ' ; Iteraction = ' + str(KLEN) + ' ; Key = ' + str(bestkey) + ' ; Out = ' + Vigenere(bestkey).decipher(ctext))
def byte_xor_detect(ciphertexts): fitness = ns.ngram_score('english_monograms.txt') max_score = -float('inf') for i in range(0, len(ciphertexts)): (key, plaintext) = byte_xor_crack(ciphertexts[i]) score = fitness.score(plaintext) if (score > max_score): max_score = score n = i real_plaintext = plaintext real_key = key return (n, real_key, real_plaintext)
def byte_xor_crack(ciphertext): fitness = ns.ngram_score('english_monograms.txt') max_score = -float('inf') for i in range(0x00, 0x100): gamma = chr(i) * len(ciphertext) text = xor(ciphertext, gamma) score = fitness.score(text) if score > max_score: max_score = score key = gamma[0] plaintext = text return (key, plaintext)
def autobreak(msg): import ngram_score as ns fitness = ns.ngram_score() key = "A" liste_keys = [] score_key = -1000 for i in range(10): liste_keys.append(i) for j in range(len(key)): for k in range(65, 91): key[j] = chr(k) if fitness.score(crypt(msg, key, True)) > score_key: liste_keys[i] = key key += "A"
def break_caesar(ctext): """ This function will break the Caesar Cipher with english frequence detect, therefore, this will not do well on short cipher QwQ. """ from ngram_score import ngram_score fitness = ngram_score('./English_Frequency/quadgram.pickle') # load our quadgram statistics # make sure ciphertext has all spacing/punc removed and is uppercase ctext = re.sub('[^A-Z]','',ctext.upper()) # try all possible keys, return the one with the highest fitness scores = [] for i in range(26): scores.append((fitness.score(Caesar(i).decipher(ctext)),i)) print('best candidate with key (a,b) = {}:'.format(str(max_key[1]))) print(Caesar(max_key[1]).decipher(ctext)) return max(scores)
def word_decrypt_sub(word): fitness = ngram_score('english_quadgrams.txt') # load our quadgram statistics ctext = word ctext = re.sub('[^A-Z]', '', ctext.upper()) maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') maxscore = -99e9 # First generated key will always replace this parentscore, parentkey = maxscore, maxkey[:] # keep going until we are killed by the user i = 0 while (True): i = i + 1 random.shuffle(parentkey) # SimpleSub will replace the 'abc...' with the key e.g 'dje...' deciphered = SimpleSub(parentkey).decipher(ctext) parentscore = fitness.score(deciphered) # If there are no improvement then we move to a different set of keys # Checking for improvements within 1000 iterations count = 0 while count < 1000: a = random.randint(0, 25) b = random.randint(0, 25) child = parentkey[:] # swap two characters in the child child[a], child[b] = child[b], child[a] deciphered = SimpleSub(child).decipher(ctext) score = fitness.score(deciphered) # if the child was better, replace the parent with it if score > parentscore: parentscore = score parentkey = child[:] count = 0 count = count + 1 # keep track of best score seen so far if parentscore > maxscore: maxscore, maxkey = parentscore, parentkey[:] print ('\nbest score so far:',maxscore,'on iteration',i) ss = SimpleSub(maxkey) print (' best key: ' + ''.join(maxkey)) print (' plaintext: ' + ss.decipher(ctext))
def resolver(ctext): ctext = re.sub(r'[^A-Z]', '', ctext.upper()) fitness = ngram_score('quadgrams.txt') # load quadgram statistics KLEN = ic_calculate(ctext) parentkey = list('A' * (KLEN)) maxscore = -99e9 parentscore = maxscore deciphered = Vigenere(parentkey).decipher(ctext) parentscore = fitness.score(deciphered) result = "" while 1: count = 0 for count in range(0, KLEN): for i in permutations('ABCDEFGHIJKLMNOPQRSTUVWXYZ', 1): child = parentkey[:] child[count] = ''.join(i) deciphered = Vigenere(child).decipher(ctext) score = fitness.score(deciphered) if score > parentscore: parentscore = score parentkey = child[:] if parentscore > maxscore: maxscore = parentscore ss = Vigenere(parentkey) plaintext = ss.decipher(ctext) if plaintext == result: break else: result = plaintext key = ''.join(parentkey) out = dict(key=key, text=result) return out
def main(): start = time.perf_counter() # specifying path to quadgrams.txt in order to read from it quadgram_dir = "../../tests/" quadgram_path = os.path.join(quadgram_dir, "quadgrams.txt") # in order to be able to import ngramscore, I added its location to sys.path sys.path.insert(1, "../ngramcode/") from ngram_score import ngram_score match = ngram_score(quadgram_path) # load our quadgram stats ss = SubCipher() cipher = ss.loadfile(subCipher_dir) print(ss.decodeAlgo(cipher, match)) finish = time.perf_counter() print("Program finished in {} second(s).".format(round(finish - start)))
def decryptQuadgram(input, longitudClave): key = "" for i in range(longitudClave): key += chr(65 + randint(0, 25)) print("Clave aleatoria inicial: ", key) ngram = ngram_score( "/home/diego/PycharmProjects/Seguridad/ejercicio9-vigenere/spanish_monograms.txt" ) fitness = ngram.score(decrypt(input, key)) print("Fitness inicial: ", fitness) encontrado = False indexClave = 0 while not encontrado: bestFitness = float("-inf") bestKey = "" # Generamos una derivación de la clave childrenKeys = computeChildren(key, indexClave) indexClave = (indexClave + 1) % longitudClave for childKey in childrenKeys: childScore = ngram.score(decrypt(input, childKey)) if childScore > bestFitness: bestFitness = childScore bestKey = childKey if bestFitness - 1 <= fitness: encontrado = True else: fitness = bestFitness key = bestKey print("Clave: {} Fitness: {}".format(key, fitness)) print("Clave definitiva: {}".format(key)) print("Mensaje: {}".format(decrypt(input, key)))
def SubsCracker(ctext, ngram): fitness = ngram_score(ngram) ctext = re.sub('[^A-Z]', '', ctext.upper()) maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') maxscore = -99e9 parentscore, parentkey = maxscore, maxkey[:] i = 0 print( '\033[1;34m[*]\033[0m Cracking the cipher. This might take a while...') while 1: i = i + 1 random.shuffle(parentkey) deciphered = SimpleSub(parentkey).decipher(ctext) parentscore = fitness.score(deciphered) count = 0 while count < 1000: a = random.randint(0, 25) b = random.randint(0, 25) child = parentkey[:] child[a], child[b] = child[b], child[a] deciphered = SimpleSub(child).decipher(ctext) score = fitness.score(deciphered) if score > parentscore: parentscore = score parentkey = child[:] count = 0 count = count + 1 if parentscore > maxscore: maxscore, maxkey = parentscore, parentkey[:] print('\n\033[1;34m[*]\033[0m Best score so far: ' + str(maxscore) + ' on iteration ' + str(i)) ss = SimpleSub(maxkey) print(' \033[1;32m[+]\033[0m Best key: ' + ''.join(maxkey)) print(' \033[1;32m[+]\033[0m Plaintext: ' + ss.decipher(ctext))
def main(): start = time.perf_counter() # specifying path to quadgrams.txt in order to read from it quadgram_dir = "../../tests/" quadgram_path = os.path.join(quadgram_dir, "quadgrams.txt") # in order to be able to import ngramscore, I added its location to sys.path sys.path.insert(1, "../ngramcode/") from ngram_score import ngram_score match = ngram_score(quadgram_path) # load our quadgram stats q = multiprocessing.Queue() ss = SubCipher( q) # creating instance of SubCipher class and passing queue to it cipher = ss.loadfile( subCipher_dir ) # loading cipher text file from Substitution Cipher directory print(ss.decodeAlgo(cipher, match)) finish = time.perf_counter() print("Program finished in {} second(s).".format( round(finish - start))) # output info on total execution time
# -*- coding: utf-8 -*- """ Solution to substitution cipher by Pieter and Ole Created on Wed Jul 20 19:14:20 2016 @author: ole """ import sys import ngram_score as ns import string from random import randint scorer = ns.ngram_score('english_trigrams.txt') def compute_fitness(text): return scorer.score(text) ALPHABET = list(string.ascii_uppercase) def make_table(key): return dict(zip(ALPHABET,key)) def decipher(text,table): ret = "" for c in text: ret += table.get(c,c) return ret
import sys sys.path.append('/home/seb/tools/crypto/ngram_score') sys.path.append('/home/seb/tools/crypto/pycipher') from ngram_score import ngram_score fitness = ngram_score('/home/seb/tools/crypto/practicalcryptography.com/quadgrams.txt') import morse_talk #data = '-.-.-..-..--.--..-..---.-.--.--.' data = '-.-. - ..-. .-- .--.'.replace(' ', '') #data = '-.-' #.-..-..--.--..-..---.-.--.--.' #data = 'ABCD' ''' A B C A BC AB C ''' ''' result = A BCD A B CD A B C D A BC D AB CD AB C D ABC D ABCD '''
def decrypt(ctxt, key): ret = str() for c in ctxt: i = ord(c) i -= ord('A') i -= key i = (i + 26) % 26 i += ord('A') c = chr(i) ret += c return ret fi = open("caesar_ciphertext.txt", "r") ciphertext = fi.readline().strip() fi.close() ns = ngram_score("english_quadgrams.txt") texts = dict() for k in range(0, 26): plaintext = decrypt(ciphertext, k) scr = ns.score(plaintext) texts[plaintext] = scr print(plaintext + " " + str(ns.score(plaintext))) result = max(texts, key=texts.get) print("Result: " + result)
import sys import random import ngram_score as ns fitness = ns.ngram_score('english_quadgrams.txt') englishAlphabet = [' ', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'] frequencyAlphabet = ['E', 'T', 'A', 'O', 'I', 'N', 'S', 'R', 'H', 'L', 'D', 'C', 'U', 'M', 'F', 'P', 'G', 'W', 'Y', 'B', 'V', 'K', 'X', 'J', 'Q', 'Z'] # Returns the text with all characters in upper case def getUpperCaseText(tokens): folded = tokens.upper() return folded # Generates the key as a list of tuples def generateTuppleList(keyList, alphabetList): tuppledKey = zip(keyList, alphabetList) return tuppledKey # Encrypts a plaintext using the given key def encryptText(key, plainText): textList = getUpperCaseText(plainText) cipherText = [] for char in textList: for charTupple in key: if char == charTupple[1]: cipherText.extend(charTupple[0]) return cipherText # Sorts characters from highest occurence to lowest and removes duplicate characters def sortOccurence(text): sortedOccurList = []
import numpy as np import re import matplotlib.pyplot as plt import ngram_score as ns import itertools frequency_alphabet = 'etaoinshrdlcumwfgypbvkjxqz' plain_alphabet = 'abcdefghijklmnopqrstuvwxyz' plain_alphabet_cap = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' trigram_fitness = ns.ngram_score(ns.trigrams) quadgram_fitness = ns.ngram_score(ns.quadgrams) def scoreOnTrigrams(text, sample_length=24, times=10): text_cap = text.upper() max_pos = len(text) - sample_length - 1 acc = 0 for i in range(times): pos = np.random.randint(0, max_pos) acc += trigram_fitness.score(text_cap[pos:pos+sample_length]) return acc / times def scoreOnQuadgrams(text, sample_length=24, times=10): text_cap = text.upper() max_pos = len(text) - sample_length - 1 acc = 0 for i in range(times):
Algorithm: 1. Use a random key and set this as the parent key Do this for 1000 iterations: 2. Store the fitness score (score based on occurrences of Quadgrams) of the deciphered text using key 3. Swap 2 characters in the parent key and set this as child. If fitness score calculated using the child is greater than parent then set the parent as child key 4. Print current best key and best plaintext if fitness score is higher than from the earlier iterations and move back to step 1 @author: anirudhravi """ from pycipher import SimpleSubstitution as SimpleSub import time import re import random from ngram_score import ngram_score fitness = ngram_score("quadgrams.txt") # Import number of times Quadgrams occur in "War and Peace" by Leo Tolstoy ctext = "tpfccdlfdttepcaccplircdtdklpcfrp?qeiqlhpqlipqeodfgpwafopwprtiizxndkiqpkiikrirrifcapncdxkdciqcafmdvkfpcadf" ctext = re.sub("[^A-Z]", "", ctext.upper()) # Remove spaces and make all characters Upper Case maxkey = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ") maxscore = -99e9 parentscore, parentkey = maxscore, maxkey[:] print "Cryptanalysis of Monoalphabetic Cipher. Several Iterations are run to get the correct result." print "Enter CTRL+C to exit the program" # Keep going until the program is killed start_time = time.time() i = 0 while 1: i = i + 1 random.shuffle(parentkey)
self.store.sort(reverse=True) self.store = self.store[:self.N] def __getitem__(self,k): return self.store[k] def __len__(self): return len(self.store) import re # this is the second feynman cipher, no known decryption ctext ='XUKEXWSLZJUAXUNKIGWFSOZRAWURORKXAOSLHROBXBTKCMUWDVPTFBLMKEFVWMUXTVTWUIDDJVZKBRMCWOIWYDXMLUFPVSHAGSVWUFWORCWUIDUJCNVTTBERTUNOJUZHVTWKORSVRZSVVFSQXOCMUWPYTRLGBMCYPOJCLRIYTVFCCMUWUFPOXCNMCIWMSKPXEDLYIQKDJWIWCJUMVRCJUMVRKXWURKPSEEIWZVXULEIOETOOFWKBIUXPXUGOWLFPWUSCH' ctext = re.sub('[^A-Z]','',ctext.upper()) mono = ngram_score('monograms.txt') bi = ngram_score('bigrams.txt') quad = ngram_score('quadgrams.txt') N = 20 rec = nbest(N) for seq in product(range(26),repeat=3): if seq[0]%2 == 0 and seq[1]%2 == 0 and seq[2]%2 == 0: continue if seq[0]%13 == 0 and seq[1]%13 == 0 and seq[2]%13 == 0: continue seq2 = (seq[0],seq[1],seq[2],1,1,1,1,1,1) txt = hill3decipher(ctext,seq2) score = 0 for i in range(0,len(txt),3): score += mono.score(txt[i])
Algorithm: 1. Use a random key and set this as the parent key Do this for 1000 iterations: 2. Store the fitness score (score based on occurrences of Quadgrams) of the deciphered text using key 3. Swap 2 characters in the parent key and set this as child. If fitness score calculated using the child is greater than parent then set the parent as child key 4. Print current best key and best plaintext if fitness score is higher than from the earlier iterations and move back to step 1 @author: anirudhravi ''' from pycipher import SimpleSubstitution as SimpleSub import time import re import random from ngram_score import ngram_score fitness = ngram_score( 'quadgrams.txt' ) # Import number of times Quadgrams occur in "War and Peace" by Leo Tolstoy ctext = 'tpfccdlfdttepcaccplircdtdklpcfrp?qeiqlhpqlipqeodfgpwafopwprtiizxndkiqpkiikrirrifcapncdxkdciqcafmdvkfpcadf' ctext = re.sub( '[^A-Z]', '', ctext.upper()) #Remove spaces and make all characters Upper Case maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') maxscore = -99e9 parentscore, parentkey = maxscore, maxkey[:] print "Cryptanalysis of Monoalphabetic Cipher. Several Iterations are run to get the correct result." print "Enter CTRL+C to exit the program" # Keep going until the program is killed start_time = time.time() i = 0
def decipher(str, key_list): key_dict = dict() val = ord("A") for c in key_list: key_dict[c] = chr(val) val += 1 result = "" for c in str: if c in key_dict: result += key_dict[c] return result fit = ngram_score("quadgrams.txt") c_text = open("cipher_text.txt", "r").read() out_file = open("monoalphabetic_substitution_cipher_output.txt", "w") global_best_score = -999999999 global_best_key = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ") while True: local_best_key = global_best_key[:] random.shuffle(local_best_key) local_p_text = decipher(c_text, local_best_key) local_best_score = fit.score(local_p_text) loop_count = 0 while loop_count < 1000: index1 = random.randint(0, 25)
import random from ngram_score import ngram_score import re # load our quadgram model with open ('quadgrams.txt', 'r') as ngram_file: ngrams = ngram_file.readlines() fitness = ngram_score(ngrams) # helper function, converts an integer 0-25 into a character def i2a(i): return 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'[i%26] # decipher a piece of text using the substitution cipher and a certain key def sub_decipher(text,key): invkey = [i2a(key.index(i)) for i in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'] ret = '' for c in text: if c.isalpha(): ret += invkey[ord(c.upper())-ord('A')] else: ret += c return ret def break_simplesub(ctext,startkey=None): ''' perform hill-climbing with a single start. This function may have to be called many times to break a substitution cipher. ''' # make sure ciphertext has all spacing/punc removed and is uppercase ctext = re.sub('[^A-Z]','',ctext.upper()) parentkey,parentscore = startkey or list('ABCDEFGHIJKLMNOPQRSTUVWXYZ'),-99e99 if not startkey: random.shuffle(parentkey) parentscore = fitness.score(sub_decipher(ctext,parentkey)) count = 0 while count < 1000:
import os os.listdir() from pycipher import SimpleSubstitution as SimpleSub import random import re from ngram_score import ngram_score fitness = ngram_score( '/root/Documents/CTF_WECALL/quadgrams.txt') # load our quadgram statistics ctext = 'UH BDP TFYXRDBH RZV HZQ STI WPTV BDXE YH GWXPIV X TY XYLWPEEPV NPWH APFF VZIP HZQW EZFQBXZI OPH XE YIGEGUVYFIDT BDXE FXBBFP SDTFFPIRP ATE IZB BZZ DTWV ATE XB' #ctext = re.sub('[^A-Z]','',ctext.upper()) ctext.rfind() maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') maxscore = -99e9 parentscore, parentkey = maxscore, maxkey[:] print("Substitution Cipher solver, you may have to wait several iterations") print("for the correct result. Press ctrl+c to exit program.") # keep going until we are killed by the user i = 0 while 1: i = i + 1 random.shuffle(parentkey) deciphered = SimpleSub(parentkey).decipher(ctext) parentscore = fitness.score(deciphered) count = 0 while count < 1000: a = random.randint(0, 25) b = random.randint(0, 25) child = parentkey[:] # swap two characters in the child
from operator import itemgetter from ngram_score import ngram_score def decrypt(ctxt, key): ret = str() for c in ctxt: i = ord(c) i -= ord('A') i -= key i = (i+26)%26 i += ord('A') c = chr(i) ret += c return ret fi = open("caesar_ciphertext.txt", "r") ciphertext = fi.readline().strip() fi.close() ns = ngram_score("english_quadgrams.txt") texts = dict() for k in range(0, 26): plaintext = decrypt(ciphertext, k) scr = ns.score(plaintext) texts[plaintext] = scr print(plaintext + " " + str(ns.score(plaintext))) result = max(texts, key=texts.get) print("Result: " + result)
# usage: python break_fracmorse.py 'CIPHERTEXTMESSAGE' # ideally you'll want 200 or so characters to reliably decrypt, shorter will often work but not as reliably. import random from ngram_score import ngram_score import re import sys from pycipher import FracMorse #ctext = FracMorse('PQRSTUVWXYZABCDEFGHIJKLMNO').encipher("He has not been returned to sea because of his affection for caregivers.The waitress pointed to the lunch menu, but the oldest living ex-major leaguer had no use for it") fitness = ngram_score('fmorse_quadgrams.txt') # load our quadgram model # helper function, converts an integer 0-25 into a character def i2a(i): return 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'[i%26] # decipher a piece of text using the substitution cipher and a certain key def sub_decipher(text,key): invkey = [i2a(key.index(i)) for i in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'] ret = '' for c in text: if c.isalpha(): ret += invkey[ord(c.upper())-ord('A')] else: ret += c return ret # This code is just the simple substitution cipher cracking code, it works perfectly for fractionated morse as # long as you use fractioned morse statistics instead of english statistics. def break_simplesub(ctext,startkey=None): ''' perform hill-climbing with a single start. This function may have to be called many times to break a substitution cipher. ''' # make sure ciphertext has all spacing/punc removed and is uppercase ctext = re.sub('[^A-Z]','',ctext.upper())
import re import sys from ngram_score import ngram_score fitness = ngram_score('quadgrams.txt') # load our quadgram statistics from pycipher import Caesar def break_caesar(ctext): # make sure ciphertext has all spacing/punc removed and is uppercase ctext = re.sub('[^A-Z]', '', ctext.upper()) # try all possible keys, return the one with the highest fitness scores = [] for i in range(26): scores.append((fitness.score(Caesar(i).decipher(ctext)), i)) return max(scores) # ciphertext ctext = "" if (len(sys.argv) >= 2): with open(sys.argv[1], "r") as f: ctext = str(f.read()) print("Cipher text: " + ctext) else: print("Usage: " + sys.argv[0] + " filename") max_key = break_caesar(ctext) print('best candidate with key (a,b) = ' + str(max_key[1]) + ':') plaintext = Caesar(max_key[1]).decipher(ctext) print(plaintext)
#This program decrypts a message encrypted using Vignère cipher (without knowing the key) : # To decrypt a message, this program tests the decryption with all possible keys, for example if the key has a maximum # length of 5, it begins to decrypt using as key A ..... Z then AA ........ ZZ to AAAAA ........ ZZZZZ # then it selects the best decrypted sentence that looks like english # For this, the program uses : # -An english dictionary which contains english quadgrams and a Python program that calculates sentence scores according # to their similarity to English : https://github.com/jameslyons/python_cryptanalysis/blob/master/ngram_score.py # The key here is set to have a maximum size of 10 characters, this can be changed by modifying the 'for' loop at line 92 # See example file 'vignere_autobreak.txt' import ngram_score as ns import numpy as np fit=ns.ngram_score('english_quadgrams.txt') def grid(): """ prints Vignère table """ print(" ",end="") for i in range(65,91): print("%c"%i,end=" ") print("") print(" ",end="") for i in range(65,91): print("_",end=" ") print("") for i in range(65,91): print("%c |"%i,end=" ") j=i while (j<=90):
from pycipher import SimpleSubstitution as SimpleSub import random import re from ngram_score import ngram_score fitness = ngram_score('spanish_quadgrams.txt') # load our quadgram statistics ctext='pjbbfcklerfebjppjjlboumcuppelqpfezbjruoqlerdjbcuddbukulfjojprfebjbjzfrtmloupraublxpepkurtppdbjcbelfrfebkj' ctext = re.sub('[^A-Z]','',ctext.upper()) maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') maxscore = -99e9 parentscore,parentkey = maxscore,maxkey[:] print "Substitution Cipher solver, you may have to wait several iterations" print "for the correct result. Press ctrl+c to exit program." # keep going until we are killed by the user i = 0 while 1: i = i+1 random.shuffle(parentkey) deciphered = SimpleSub(parentkey).decipher(ctext) parentscore = fitness.score(deciphered) count = 0 while count < 1000: a = random.randint(0,25) b = random.randint(0,25) child = parentkey[:] # swap two characters in the child child[a],child[b] = child[b],child[a] deciphered = SimpleSub(child).decipher(ctext) score = fitness.score(deciphered) # if the child was better, replace the parent with it
####################################### ##breaker by practicalcryptography.com ####################################### from ngram_score import ngram_score from pycipher import Vigenere import re from itertools import permutations f = file('encoded.txt','r') ctext = f.read().replace(" ","") qgram = ngram_score('english_quadgrams.txt') trigram = ngram_score('english_trigrams.txt') #ctext = 'kiqpbkxspshwehospzqhoinlgapp' ctext = re.sub(r'[^A-Z]','',ctext.upper()) # keep a list of the N best things we have seen, discard anything else class nbest(object): def __init__(self,N=1000): self.store = [] self.N = N def add(self,item): self.store.append(item) self.store.sort(reverse=True) self.store = self.store[:self.N] def __getitem__(self,k): return self.store[k] def __len__(self):
import ngram_score as ns import caesar_cipher as cc fitness = ns.ngram_score('data/english_quadgrams.txt') def break_cipher(ciphertext, keys): max_fitness_score = float('-inf') for key in keys: plaintext = cc.caesar_decipher(ciphertext, key) fitness_score = fitness.score(plaintext) if fitness_score > max_fitness_score: max_fitness_score = fitness_score most_accurate_plaintext = plaintext most_accurate_key = key print( f'Key = {key}\tPlaintext = {plaintext}\tFitness = {fitness_score}') print( f'\nMost accurate key = {most_accurate_key}\tMost accurate plantext = {most_accurate_plaintext}\tFitness = {max_fitness_score}\n' ) def example1(): break_cipher('YMJHFJXFWHNUMJWNXTSJTKYMJJFWQNJXYPSTBSFSIXNRUQJXYHNUMJWX', range(1, 26)) break_cipher('VHFUHW', [3, 5]) def example2(): ciphertext = "YMJRFLNHTKHTINSLNSYMJZSNAJWXNYD" break_cipher(ciphertext, range(1, 26))
from ngram_score import ngram_score from pycipher import Autokey import re from itertools import permutations qgram = ngram_score('quadgrams.txt') trigram = ngram_score('trigrams.txt') ctext = 'isjiqymdebvuzrvwhmvysibugzhyinmiyeiklcvioimbninyksmmnjmgalvimlhspjxmgfiraqlhjcpvolqmnyynhpdetoxemgnoxl' ctext = re.sub(r'[^A-Z]', '', ctext.upper()) # keep a list of the N best things we have seen, discard anything else class nbest(object): def __init__(self, N=1000): self.store = [] self.N = N def add(self, item): self.store.append(item) self.store.sort(reverse=True) self.store = self.store[:self.N] def __getitem__(self, k): return self.store[k] def __len__(self): return len(self.store) #init N = 100
from ngram_score import ngram_score from pycipher import Autokey import re from itertools import permutations qgram = ngram_score('quadgrams.txt') trigram = ngram_score('trigrams.txt') ctext = 'isjiqymdebvuzrvwhmvysibugzhyinmiyeiklcvioimbninyksmmnjmgalvimlhspjxmgfiraqlhjcpvolqmnyynhpdetoxemgnoxl' ctext = re.sub(r'[^A-Z]','',ctext.upper()) # keep a list of the N best things we have seen, discard anything else class nbest(object): def __init__(self,N=1000): self.store = [] self.N = N def add(self,item): self.store.append(item) self.store.sort(reverse=True) self.store = self.store[:self.N] def __getitem__(self,k): return self.store[k] def __len__(self): return len(self.store) #init N=100 for KLEN in range(3,20): rec = nbest(N)
import random import ngram_score as ns #This was programmed and run in an online compiler so runtimes may very #Sources used #https://inventwithpython.com/hacking/chapter17.html #http://practicalcryptography.com/cryptanalysis/text-characterisation/quadgrams/ #https://repository.cardiffmet.ac.uk/bitstream/handle/10369/8628/Brown%2C%20Ryan%20James.pdf?sequence=1&isAllowed=y fitness = ns.ngram_score('mixedEnglishgrams.txt') #collection of ngrams from the internet, uses one that best fits the size of the text. cipher = "iyhqz ewqin azqej shayz niqbe aheum hnmnj jaqii yuexq ayqkn jbeuq iihed yzhni ifnun sayiz yudhe sqshu qesqa iluym qkque aqaqm oejjs hqzyu jdzqa diesh niznj jayzy uiqhq vayzq shsnj jejjz nshna hnmyt isnae sqfun dqzew qiead zevqi zhnjq shqze udqai jrmtq uishq ifnun siiqa suoij qqfni syyle iszhn bhmei squih nimnx hsead shqmr udquq uaqeu iisqe jshnj oihyy snaxs hqihe lsilu ymhni tyz" message = cipher.upper() def swap(key): x = 0 y = 0 new = "" while (x == y): x = random.randint(0, 25) #randomly swaps them from the alphabet y = random.randint(0, 25) for i in key: #chooses the index in which the swap occurs if key.index(i) == x: new += key[y] elif key.index(i) == y:
word_list.append("GEEGB") word_list.append("GEB") #Vigenere decode the using SOWPODS file print("Vigenere decoding " + str(len(decoded_texts)) + " candidates with " + str(len(word_list)) + " key words...\n(Good luck!)") vig_decode = [] counter = 0 for word in word_list: print(str((counter / len(word_list)) * 100) + "%") for item in decoded_texts: vig_decode.append(tryDecode(word, item)) counter = counter + 1 #Check the decoded results for any words matching the list of common words print("Calculate quadgram scores for " + str(len(vig_decode)) + " strings...") fitness = ns.ngram_score("c:/Users/Tommy/OneDrive/Documents/crypto1/quadgrams.txt") scores = [] for items in vig_decode: scores.append(fitness.score(items)) print("Sorting results...") sorted_texts = estimatedBest(vig_decode, scores) #use this for threading #q1 = Queue() #t1 = Process(target=findEnglishWords, args=(decoded_list_1, common_words_file, 0, q1)).start() #word_hits_1 = q1.get() #Output the results for i in range(0, len(sorted_texts)): print(str(vig_decode[sorted_texts[i]]))
import random import string import nltk from six.moves import xrange from ngram_score import ngram_score fitness = ngram_score('english_bigrams.txt') def sub_decipher(text, key, alphabet): invkey = [string.ascii_uppercase[(key.index(c))] for c in alphabet] ret = '' for c in text: ret += invkey[ord(c) - ord(alphabet[0])] return ret def next_iteration(text, key, alphabet): alphabet_size = len(alphabet) iterations_count = 1000 score = fitness.score(sub_decipher(text, key, alphabet)) count = 0 while count < iterations_count: a = random.randint(0, alphabet_size - 1) b = random.randint(0, alphabet_size - 1) child = key[:] child[a], child[b] = child[b], child[a] current_score = fitness.score(sub_decipher(text, child, alphabet)) if current_score > score: score, key = current_score, child[:] count = 0 count += 1 return score, key