예제 #1
0
def getNgramScore(text, n=4):
    '''Score the similarity of the text to English using ngrams'''
    if n == 2:
        global biGramScore
        if biGramScore is None:
            biGramScore = ngram_score.ngram_score(os.getcwd() +
                                                  '/data/english_bigrams.txt')
        score = biGramScore.score(text.upper())
    elif n == 3:
        global triGramScore
        if triGramScore is None:
            triGramScore = ngram_score.ngram_score(
                os.getcwd() + '/data/english_trigrams.txt')
        score = triGramScore.score(text.upper())
    elif n == 4:
        global quadGramScore
        if quadGramScore is None:
            quadGramScore = ngram_score.ngram_score(
                os.getcwd() + '/data/english_quadgrams.txt')
        score = quadGramScore.score(text.upper())
    elif n == 5:
        global quinGramScore
        if quinGramScore is None:
            quinGramScore = ngram_score.ngram_score(
                os.getcwd() + '/data/english_quingrams.txt')
        score = quinGramScore.score(text.upper())
    else:
        biScore = getNgramScore(text, 2)
        triScore = getNgramScore(text, 3)
        quadScore = getNgramScore(text, 4)
        quinScore = getNgramScore(text, 5)
        score = (biScore + triScore + quadScore + quinScore) / 4
    return score
예제 #2
0
파일: utils.py 프로젝트: appsol/crypto
def getNgramScore(text, n=4):
    '''Score the similarity of the text to English using ngrams'''
    if n == 2:
        global biGramScore
        if biGramScore is None:
            biGramScore = ngram_score.ngram_score(os.getcwd() + '/data/english_bigrams.txt')
        score = biGramScore.score(text.upper())
    elif n == 3:
        global triGramScore
        if triGramScore is None:
            triGramScore = ngram_score.ngram_score(os.getcwd() + '/data/english_trigrams.txt')
        score = triGramScore.score(text.upper())
    elif n == 4:
        global quadGramScore
        if quadGramScore is None:
            quadGramScore = ngram_score.ngram_score(os.getcwd() + '/data/english_quadgrams.txt')
        score = quadGramScore.score(text.upper())
    elif n == 5:
        global quinGramScore
        if quinGramScore is None:
            quinGramScore = ngram_score.ngram_score(os.getcwd() + '/data/english_quingrams.txt')
        score = quinGramScore.score(text.upper())
    else:
        biScore = getNgramScore(text, 2)
        triScore = getNgramScore(text, 3)
        quadScore = getNgramScore(text, 4)
        quinScore = getNgramScore(text, 5)
        score =  (biScore + triScore + quadScore + quinScore) / 4
    return score
예제 #3
0
def AutokeyCracker(ctext, q1gram, t1gram):

    qgram = ngram_score(q1gram)
    trigram = ngram_score(t1gram)
    ctext = re.sub(r'[^A-Z]', '', ctext.upper())

    class nbest(object):
        def __init__(self, N=1000):
            self.store = []
            self.N = N

        def add(self, item):
            self.store.append(item)
            self.store.sort(reverse=True)
            self.store = self.store[:self.N]

        def __getitem__(self, k):
            return self.store[k]

        def __len__(self):
            return len(self.store)

    N = 100
    for KLEN in range(3, 20):
        rec = nbest(N)

        for i in permutations('ABCDEFGHIJKLMNOPQRSTUVWXYZ', 3):
            key = ''.join(i) + 'A' * (KLEN - len(i))
            pt = Autokey(key).decipher(ctext)
            score = 0
            for j in range(0, len(ctext), KLEN):
                score += trigram.score(pt[j:j + 3])
            rec.add((score, ''.join(i), pt[:30]))

        next_rec = nbest(N)
        for i in range(0, KLEN - 3):
            for k in xrange(N):
                for c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
                    key = rec[k][1] + c
                    fullkey = key + 'A' * (KLEN - len(key))
                    pt = Autokey(fullkey).decipher(ctext)
                    score = 0
                    for j in range(0, len(ctext), KLEN):
                        score += qgram.score(pt[j:j + len(key)])
                    next_rec.add((score, key, pt[:30]))
            rec = next_rec
            next_rec = nbest(N)
        bestkey = rec[0][1]
        pt = Autokey(bestkey).decipher(ctext)
        bestscore = qgram.score(pt)
        for i in range(N):
            pt = Autokey(rec[i][1]).decipher(ctext)
            score = qgram.score(pt)
            if score > bestscore:
                bestkey = rec[i][1]
                bestscore = score
        print('\033[1;34m[*]\033[0m Score = ' + str(bestscore) +
              ' ; Iteration = ' + str(KLEN) + ' ; Key = ' + bestkey +
              ' ; Out = ' + Autokey(bestkey).decipher(ctext))
예제 #4
0
def crack(ciphertext):
    distances = []
    correct_key = ""
    best_score = -float('inf')
    correct_plaintext = ""
    fitness = ns.ngram_score('english_monograms.txt')
    #1. Guess key length
    for i in range(2, 40):
        d1 = float(hamming_distance(ciphertext[0:i], ciphertext[i:2 * i]) / i)
        d2 = float(
            hamming_distance(ciphertext[2 * i:3 * i], ciphertext[3 * i:4 * i])
            / i)
        d3 = float(
            hamming_distance(ciphertext[4 * i:5 * i], ciphertext[5 * i:6 * i])
            / i)
        d = float((d1 + d2 + d3) / 3)
        distances.append((i, d))
    distances = sorted(distances, key=lambda tup: tup[1])
    #2. Break ciphertext into blocks
    for i in range(0, 5):
        key_len = distances[i][0]
        text_blocks = ["" for _ in range(key_len)]
        for j in range(len(ciphertext)):
            text_blocks[j % key_len] = text_blocks[j % key_len] + ciphertext[j]
        key = ""
        #solve each block separately
        for j in range(len(text_blocks)):
            key = key + xor.byte_xor_crack(text_blocks[j])[0]
        plaintext = encrypt(ciphertext, key)
        score = fitness.score(plaintext)
        if score > best_score:
            best_score = score
            correct_plaintext = plaintext
            correct_key = key
    return (correct_key, correct_plaintext)
예제 #5
0
def AffineCracker(ctext, ngram):
    fitness = ngram_score(ngram)
    max_key = break_affine(ctext, fitness)

    print('\033[1;32m[+]\033[0m Best candidate with key (a,b) = ' +
          str(max_key[1]) + ':')
    print(Affine(max_key[1][0], max_key[1][1]).decipher(ctext))
예제 #6
0
def test_substitution_cipher(text, max_iterations):
    fitness = ngram_score(
        'english_quadgrams.txt')  # load our quadgram statistics
    ctext = re.sub('[^A-Z]', '', text.upper())

    maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    maxscore = -99e9
    parentscore, parentkey = maxscore, maxkey[:]
    for i in range(0, max_iterations):
        random.shuffle(parentkey)
        deciphered = SimpleSub(parentkey).decipher(ctext)
        parentscore = fitness.score(deciphered)
        count = 0
        while count < 1000:
            a = random.randint(0, 25)
            b = random.randint(0, 25)
            child = parentkey[:]
            # swap two characters in the child
            child[a], child[b] = child[b], child[a]
            deciphered = SimpleSub(child).decipher(ctext)
            score = fitness.score(deciphered)
            # if the child was better, replace the parent with it
            if score > parentscore:
                parentscore = score
                parentkey = child[:]
                count = 0
            count = count + 1
        # keep track of best score seen so far
        if parentscore > maxscore:
            maxscore, maxkey = parentscore, parentkey[:]
    return maxscore, maxkey, SimpleSub(maxkey).decipher(ctext)
def autoDecryptTask(PipeIn, ciphertextContents):
        fitness = ngram_score('english_quadgrams.txt')  # load our quadgram statistics
        maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
        maxscore = -99e9
        parentscore, parentkey = maxscore, maxkey[:]
        i = 0
        while i < 1000:
            i = i + 1
            random.shuffle(parentkey)
            deciphered = substitute(ciphertextContents, parentkey)
            parentscore = fitness.score(deciphered)
            count = 0
            while count < 1000:
                a = random.randint(0, 25)
                b = random.randint(0, 25)
                child = parentkey[:]
                # swap two characters in the child
                child[a], child[b] = child[b], child[a]
                deciphered = substitute(ciphertextContents, child)
                score = fitness.score(deciphered)
                # if the child was better, replace the parent with it
                if score > parentscore:
                    parentscore = score
                    parentkey = child[:]
                    count = 0
                count = count + 1
            # keep track of best score seen so far
            if parentscore > maxscore:
                maxscore, maxkey = parentscore, parentkey[:]
                # print ('\nbest score so far:',maxscore,'on iteration',i)
                PipeIn.send(maxkey)
예제 #8
0
def decryptUsingQuadgramLocalSearch(inputText, keyLength):
    inputText = "".join(inputText.lower().split())
    key = initializeRandomKey(keyLength)
    print "INITIAL KEY: ", key
    ngram = ngram_score("english_quadgrams.txt")
    fitness = ngram.score(decrypt(inputText, key))
    print "INITIAL FITNESS: ", fitness
    improvement = True
    indexOfKeyToModify = 0
    while improvement == True:
        bestFitness = float("-inf")
        bestKey = ""
        childrenKeys = computeChildren(key, indexOfKeyToModify)
        indexOfKeyToModify = (indexOfKeyToModify + 1) % keyLength
        for childKey in childrenKeys:
            childScore = ngram.score(decrypt(inputText, childKey))
            if childScore > bestFitness:
                bestFitness = childScore
                bestKey = childKey
        if bestFitness <= fitness:
            improvement = False
        else:
            fitness = bestFitness
            key = bestKey
            print fitness, key
    print key
예제 #9
0
def caesar_ngram(input_file, output_file):
    """ to break by using quadgram statistics
		Quadgrams statistics determine how similar text is to English.

		For example several books worth of text, 
		and count each of the quadgrams that occur in them. 
		We then divide these counts by the total number of quadgrams 
		encountered to find the probability of each.

		Sources from:
		http://practicalcryptography.com/cryptanalysis/text-characterisation/quadgrams/#a-python-implementation
		https://github.com/jameslyons/python_cryptanalysis/blob/master/break_caesar.py
	"""
    ctext = input_file.read()
    original_text = ctext

    fitness = ngram_score('quadgrams.txt')  # load our quadgram statistics

    # make sure ciphertext has all spacing/punc removed and is uppercase
    ctext = re.sub('[^A-Z]', '', ctext.upper())
    # try all possible keys, return the one with the highest fitness
    scores = []
    for i in range(26):
        scores.append((fitness.score(caesar_cipher(ctext, i)), i))

    max_key = max(scores)
    print("The most possible key = " + str(max_key[1]))
    print("Check out your output file.")
    output_file.write(caesar_cipher(original_text, int(max_key[1])))
예제 #10
0
def VigenereCracker(ctext, q1gram, t1gram):

    qgram = ngram_score(q1gram)
    trigram = ngram_score(t1gram)
    ctext = re.sub(r'[^A-Z]', '', ctext.upper())
    N = 100
    for KLEN in range(3, 20):
        rec = nbest(N)

        for i in permutations('ABCDEFGHIJKLMNOPQRSTUVWXYZ', 3):
            key = ''.join(i) + 'A' * (KLEN - len(i))
            pt = Vigenere(key).decipher(ctext)
            score = 0
            for j in range(0, len(ctext), KLEN):
                score += trigram.score(pt[j:j + 3])
            rec.add((score, ''.join(i), pt[:30]))

        next_rec = nbest(N)
        for i in range(0, KLEN - 3):
            for k in xrange(N):
                for c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
                    key = rec[k][1] + c
                    fullkey = key + 'A' * (KLEN - len(key))
                    pt = Vigenere(fullkey).decipher(ctext)
                    score = 0
                    for j in range(0, len(ctext), KLEN):
                        score += qgram.score(pt[j:j + len(key)])
                    next_rec.add((score, key, pt[:30]))
            rec = next_rec
            next_rec = nbest(N)
        bestkey = rec[0][1]
        pt = Vigenere(bestkey).decipher(ctext)
        bestscore = qgram.score(pt)
        for i in range(N):
            pt = Vigenere(rec[i][1]).decipher(ctext)
            score = qgram.score(pt)
            if score > bestscore:
                bestkey = rec[i][1]
                bestscore = score
        print('\033[1;34m[*]\033[0m Score = ' + str(bestscore) +
              ' ; Iteraction = ' + str(KLEN) + ' ; Key = ' + str(bestkey) +
              ' ; Out = ' + Vigenere(bestkey).decipher(ctext))
예제 #11
0
def byte_xor_detect(ciphertexts):
    fitness = ns.ngram_score('english_monograms.txt')
    max_score = -float('inf')
    for i in range(0, len(ciphertexts)):
        (key, plaintext) = byte_xor_crack(ciphertexts[i])
        score = fitness.score(plaintext)
        if (score > max_score):
            max_score = score
            n = i
            real_plaintext = plaintext
            real_key = key
    return (n, real_key, real_plaintext)
예제 #12
0
def byte_xor_crack(ciphertext):
    fitness = ns.ngram_score('english_monograms.txt')
    max_score = -float('inf')
    for i in range(0x00, 0x100):
        gamma = chr(i) * len(ciphertext)
        text = xor(ciphertext, gamma)
        score = fitness.score(text)
        if score > max_score:
            max_score = score
            key = gamma[0]
            plaintext = text
    return (key, plaintext)
예제 #13
0
def autobreak(msg):
    import ngram_score as ns
    fitness = ns.ngram_score()
    key = "A"
    liste_keys = []
    score_key = -1000
    for i in range(10):
        liste_keys.append(i)
        for j in range(len(key)):
            for k in range(65, 91):
                key[j] = chr(k)
                if fitness.score(crypt(msg, key, True)) > score_key:
                    liste_keys[i] = key
        key += "A"
예제 #14
0
def break_caesar(ctext):
    """
    This function will break the Caesar Cipher with english frequence detect,
    therefore, this will not do well on short cipher QwQ.
    """
    from ngram_score import ngram_score
    fitness = ngram_score('./English_Frequency/quadgram.pickle') # load our quadgram statistics
    # make sure ciphertext has all spacing/punc removed and is uppercase
    ctext = re.sub('[^A-Z]','',ctext.upper())
    # try all possible keys, return the one with the highest fitness
    scores = []
    for i in range(26):
        scores.append((fitness.score(Caesar(i).decipher(ctext)),i))
    print('best candidate with key (a,b) = {}:'.format(str(max_key[1])))
    print(Caesar(max_key[1]).decipher(ctext))
    return max(scores)
def word_decrypt_sub(word):
    fitness = ngram_score('english_quadgrams.txt') # load our quadgram statistics

    ctext = word
    ctext = re.sub('[^A-Z]', '', ctext.upper())

    maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    maxscore = -99e9 # First generated key will always replace this
    parentscore, parentkey = maxscore, maxkey[:]
    # keep going until we are killed by the user
    i = 0
    while (True):
        i = i + 1
        random.shuffle(parentkey)
        # SimpleSub will replace the 'abc...' with the key e.g 'dje...'
        deciphered = SimpleSub(parentkey).decipher(ctext)
        parentscore = fitness.score(deciphered)

        # If there are no improvement then we move to a different set of keys
        # Checking for improvements within 1000 iterations
        count = 0
        while count < 1000:
            a = random.randint(0, 25)
            b = random.randint(0, 25)
            child = parentkey[:]

            # swap two characters in the child
            child[a], child[b] = child[b], child[a]
            deciphered = SimpleSub(child).decipher(ctext)
            score = fitness.score(deciphered)
            # if the child was better, replace the parent with it
            if score > parentscore:
                parentscore = score
                parentkey = child[:]
                count = 0
            count = count + 1
        # keep track of best score seen so far
        if parentscore > maxscore:
            maxscore, maxkey = parentscore, parentkey[:]
            print ('\nbest score so far:',maxscore,'on iteration',i)
            ss = SimpleSub(maxkey)
            print ('    best key: ' + ''.join(maxkey))
            print ('    plaintext: ' + ss.decipher(ctext))
예제 #16
0
def resolver(ctext):
    ctext = re.sub(r'[^A-Z]', '', ctext.upper())

    fitness = ngram_score('quadgrams.txt')  # load quadgram statistics

    KLEN = ic_calculate(ctext)

    parentkey = list('A' * (KLEN))
    maxscore = -99e9
    parentscore = maxscore

    deciphered = Vigenere(parentkey).decipher(ctext)
    parentscore = fitness.score(deciphered)

    result = ""

    while 1:
        count = 0
        for count in range(0, KLEN):
            for i in permutations('ABCDEFGHIJKLMNOPQRSTUVWXYZ', 1):
                child = parentkey[:]
                child[count] = ''.join(i)
                deciphered = Vigenere(child).decipher(ctext)
                score = fitness.score(deciphered)
                if score > parentscore:
                    parentscore = score
                    parentkey = child[:]

        if parentscore > maxscore:
            maxscore = parentscore

        ss = Vigenere(parentkey)

        plaintext = ss.decipher(ctext)
        if plaintext == result:
            break
        else:
            result = plaintext

    key = ''.join(parentkey)
    out = dict(key=key, text=result)
    return out
예제 #17
0
def main():

    start = time.perf_counter()

    # specifying path to quadgrams.txt in order to read from it
    quadgram_dir = "../../tests/"
    quadgram_path = os.path.join(quadgram_dir, "quadgrams.txt")

    # in order to be able to import ngramscore, I added its location to sys.path
    sys.path.insert(1, "../ngramcode/")
    from ngram_score import ngram_score
    match = ngram_score(quadgram_path)  # load our quadgram stats

    ss = SubCipher()

    cipher = ss.loadfile(subCipher_dir)

    print(ss.decodeAlgo(cipher, match))

    finish = time.perf_counter()
    print("Program finished in {} second(s).".format(round(finish - start)))
예제 #18
0
def decryptQuadgram(input, longitudClave):
    key = ""
    for i in range(longitudClave):
        key += chr(65 + randint(0, 25))
    print("Clave aleatoria inicial: ", key)

    ngram = ngram_score(
        "/home/diego/PycharmProjects/Seguridad/ejercicio9-vigenere/spanish_monograms.txt"
    )
    fitness = ngram.score(decrypt(input, key))
    print("Fitness inicial: ", fitness)

    encontrado = False
    indexClave = 0
    while not encontrado:
        bestFitness = float("-inf")
        bestKey = ""

        # Generamos una derivación de la clave
        childrenKeys = computeChildren(key, indexClave)
        indexClave = (indexClave + 1) % longitudClave

        for childKey in childrenKeys:
            childScore = ngram.score(decrypt(input, childKey))
            if childScore > bestFitness:
                bestFitness = childScore
                bestKey = childKey

        if bestFitness - 1 <= fitness:
            encontrado = True
        else:
            fitness = bestFitness
            key = bestKey
            print("Clave: {}  Fitness: {}".format(key, fitness))

    print("Clave definitiva: {}".format(key))
    print("Mensaje: {}".format(decrypt(input, key)))
예제 #19
0
def SubsCracker(ctext, ngram):

    fitness = ngram_score(ngram)

    ctext = re.sub('[^A-Z]', '', ctext.upper())

    maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    maxscore = -99e9
    parentscore, parentkey = maxscore, maxkey[:]
    i = 0
    print(
        '\033[1;34m[*]\033[0m Cracking the cipher. This might take a while...')
    while 1:
        i = i + 1
        random.shuffle(parentkey)
        deciphered = SimpleSub(parentkey).decipher(ctext)
        parentscore = fitness.score(deciphered)
        count = 0
        while count < 1000:
            a = random.randint(0, 25)
            b = random.randint(0, 25)
            child = parentkey[:]
            child[a], child[b] = child[b], child[a]
            deciphered = SimpleSub(child).decipher(ctext)
            score = fitness.score(deciphered)
            if score > parentscore:
                parentscore = score
                parentkey = child[:]
                count = 0
            count = count + 1
        if parentscore > maxscore:
            maxscore, maxkey = parentscore, parentkey[:]
            print('\n\033[1;34m[*]\033[0m Best score so far: ' +
                  str(maxscore) + ' on iteration ' + str(i))
            ss = SimpleSub(maxkey)
            print('    \033[1;32m[+]\033[0m Best key: ' + ''.join(maxkey))
            print('    \033[1;32m[+]\033[0m Plaintext: ' + ss.decipher(ctext))
def main():

    start = time.perf_counter()
    # specifying path to quadgrams.txt in order to read from it
    quadgram_dir = "../../tests/"
    quadgram_path = os.path.join(quadgram_dir, "quadgrams.txt")

    # in order to be able to import ngramscore, I added its location to sys.path
    sys.path.insert(1, "../ngramcode/")
    from ngram_score import ngram_score
    match = ngram_score(quadgram_path)  # load our quadgram stats

    q = multiprocessing.Queue()
    ss = SubCipher(
        q)  # creating instance of SubCipher class and passing queue to it
    cipher = ss.loadfile(
        subCipher_dir
    )  # loading cipher text file from Substitution Cipher directory

    print(ss.decodeAlgo(cipher, match))

    finish = time.perf_counter()
    print("Program finished in {} second(s).".format(
        round(finish - start)))  # output info on total execution time
# -*- coding: utf-8 -*-
"""
Solution to substitution cipher by Pieter and Ole

Created on Wed Jul 20 19:14:20 2016

@author: ole
"""

import sys
import ngram_score as ns
import string

from random import randint

scorer = ns.ngram_score('english_trigrams.txt')

def compute_fitness(text):
    return scorer.score(text)
    
ALPHABET = list(string.ascii_uppercase)

def make_table(key):
    return dict(zip(ALPHABET,key))
    
def decipher(text,table):
    ret = ""    
    for c in text:
        ret += table.get(c,c)
    return ret
    
예제 #22
0
import sys
sys.path.append('/home/seb/tools/crypto/ngram_score')
sys.path.append('/home/seb/tools/crypto/pycipher')

from ngram_score import ngram_score
fitness = ngram_score('/home/seb/tools/crypto/practicalcryptography.com/quadgrams.txt') 

import morse_talk

#data = '-.-.-..-..--.--..-..---.-.--.--.'
data = '-.-. - ..-. .-- .--.'.replace(' ', '')
#data = '-.-' #.-..-..--.--..-..---.-.--.--.'
#data = 'ABCD'

'''
A B C
A BC
AB C
'''

'''
result =
A BCD
A B CD
A B C D
A BC D
AB CD
AB C D
ABC D
ABCD
'''
예제 #23
0

def decrypt(ctxt, key):
    ret = str()
    for c in ctxt:
        i = ord(c)
        i -= ord('A')
        i -= key
        i = (i + 26) % 26
        i += ord('A')
        c = chr(i)
        ret += c
    return ret


fi = open("caesar_ciphertext.txt", "r")
ciphertext = fi.readline().strip()
fi.close()

ns = ngram_score("english_quadgrams.txt")

texts = dict()
for k in range(0, 26):
    plaintext = decrypt(ciphertext, k)
    scr = ns.score(plaintext)
    texts[plaintext] = scr
    print(plaintext + " " + str(ns.score(plaintext)))

result = max(texts, key=texts.get)
print("Result: " + result)
import sys
import random
import ngram_score as ns
fitness = ns.ngram_score('english_quadgrams.txt')

englishAlphabet = [' ', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
frequencyAlphabet = ['E', 'T', 'A', 'O', 'I', 'N', 'S', 'R', 'H', 'L', 'D', 'C', 'U', 'M', 'F', 'P', 'G', 'W', 'Y', 'B', 'V', 'K', 'X', 'J', 'Q', 'Z']

# Returns the text with all characters in upper case
def getUpperCaseText(tokens):
    folded = tokens.upper()
    return folded

# Generates the key as a list of tuples
def generateTuppleList(keyList, alphabetList):
    tuppledKey = zip(keyList, alphabetList)
    return tuppledKey

# Encrypts a plaintext using the given key
def encryptText(key, plainText):
    textList = getUpperCaseText(plainText)
    cipherText = []
    for char in textList:
        for charTupple in key:
            if char == charTupple[1]:
                cipherText.extend(charTupple[0])
    return cipherText

# Sorts characters from highest occurence to lowest and removes duplicate characters
def sortOccurence(text):
    sortedOccurList = []
예제 #25
0
import numpy as np
import re
import matplotlib.pyplot as plt
import ngram_score as ns
import itertools


frequency_alphabet = 'etaoinshrdlcumwfgypbvkjxqz'
plain_alphabet = 'abcdefghijklmnopqrstuvwxyz'
plain_alphabet_cap = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'


trigram_fitness = ns.ngram_score(ns.trigrams)
quadgram_fitness = ns.ngram_score(ns.quadgrams)


def scoreOnTrigrams(text, sample_length=24, times=10):
    text_cap = text.upper()
    max_pos = len(text) - sample_length - 1
    acc = 0
    for i in range(times):
        pos = np.random.randint(0, max_pos)
        acc += trigram_fitness.score(text_cap[pos:pos+sample_length])
    return acc / times


def scoreOnQuadgrams(text, sample_length=24, times=10):
    text_cap = text.upper()
    max_pos = len(text) - sample_length - 1
    acc = 0
    for i in range(times):
Algorithm:
1. Use a random key and set this as the parent key
Do this for 1000 iterations:
    2. Store the fitness score (score based on occurrences of Quadgrams) of the deciphered text using key
    3. Swap 2 characters in the parent key and set this as child. If fitness score calculated using the child is greater than parent then set the parent as child key
4. Print current best key and best plaintext if fitness score is higher than from the earlier iterations and move back to step 1

@author: anirudhravi
"""
from pycipher import SimpleSubstitution as SimpleSub
import time
import re
import random
from ngram_score import ngram_score

fitness = ngram_score("quadgrams.txt")  # Import number of times Quadgrams occur in "War and Peace" by Leo Tolstoy

ctext = "tpfccdlfdttepcaccplircdtdklpcfrp?qeiqlhpqlipqeodfgpwafopwprtiizxndkiqpkiikrirrifcapncdxkdciqcafmdvkfpcadf"
ctext = re.sub("[^A-Z]", "", ctext.upper())  # Remove spaces and make all characters Upper Case

maxkey = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
maxscore = -99e9
parentscore, parentkey = maxscore, maxkey[:]
print "Cryptanalysis of Monoalphabetic Cipher. Several Iterations are run to get the correct result."
print "Enter CTRL+C to exit the program"
# Keep going until the program is killed
start_time = time.time()
i = 0
while 1:
    i = i + 1
    random.shuffle(parentkey)
예제 #27
0
        self.store.sort(reverse=True)
        self.store = self.store[:self.N]   
    
    def __getitem__(self,k):
        return self.store[k]

    def __len__(self):
        return len(self.store)


import re
# this is the second feynman cipher, no known decryption
ctext ='XUKEXWSLZJUAXUNKIGWFSOZRAWURORKXAOSLHROBXBTKCMUWDVPTFBLMKEFVWMUXTVTWUIDDJVZKBRMCWOIWYDXMLUFPVSHAGSVWUFWORCWUIDUJCNVTTBERTUNOJUZHVTWKORSVRZSVVFSQXOCMUWPYTRLGBMCYPOJCLRIYTVFCCMUWUFPOXCNMCIWMSKPXEDLYIQKDJWIWCJUMVRCJUMVRKXWURKPSEEIWZVXULEIOETOOFWKBIUXPXUGOWLFPWUSCH'
ctext = re.sub('[^A-Z]','',ctext.upper())

mono = ngram_score('monograms.txt')
bi = ngram_score('bigrams.txt')
quad = ngram_score('quadgrams.txt')

N = 20
rec = nbest(N)
for seq in product(range(26),repeat=3):
    if seq[0]%2 == 0 and seq[1]%2 == 0 and seq[2]%2 == 0:
        continue
    if seq[0]%13 == 0 and seq[1]%13 == 0 and seq[2]%13 == 0:
        continue        
    seq2 = (seq[0],seq[1],seq[2],1,1,1,1,1,1)
    txt = hill3decipher(ctext,seq2)
    score = 0
    for i in range(0,len(txt),3):
        score += mono.score(txt[i])
Algorithm:
1. Use a random key and set this as the parent key
Do this for 1000 iterations:
    2. Store the fitness score (score based on occurrences of Quadgrams) of the deciphered text using key
    3. Swap 2 characters in the parent key and set this as child. If fitness score calculated using the child is greater than parent then set the parent as child key
4. Print current best key and best plaintext if fitness score is higher than from the earlier iterations and move back to step 1

@author: anirudhravi
'''
from pycipher import SimpleSubstitution as SimpleSub
import time
import re
import random
from ngram_score import ngram_score
fitness = ngram_score(
    'quadgrams.txt'
)  # Import number of times Quadgrams occur in "War and Peace" by Leo Tolstoy

ctext = 'tpfccdlfdttepcaccplircdtdklpcfrp?qeiqlhpqlipqeodfgpwafopwprtiizxndkiqpkiikrirrifcapncdxkdciqcafmdvkfpcadf'
ctext = re.sub(
    '[^A-Z]', '',
    ctext.upper())  #Remove spaces and make all characters Upper Case

maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
maxscore = -99e9
parentscore, parentkey = maxscore, maxkey[:]
print "Cryptanalysis of Monoalphabetic Cipher. Several Iterations are run to get the correct result."
print "Enter CTRL+C to exit the program"
# Keep going until the program is killed
start_time = time.time()
i = 0

def decipher(str, key_list):
    key_dict = dict()
    val = ord("A")
    for c in key_list:
        key_dict[c] = chr(val)
        val += 1
    result = ""
    for c in str:
        if c in key_dict:
            result += key_dict[c]
    return result


fit = ngram_score("quadgrams.txt")
c_text = open("cipher_text.txt", "r").read()
out_file = open("monoalphabetic_substitution_cipher_output.txt", "w")

global_best_score = -999999999
global_best_key = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")

while True:
    local_best_key = global_best_key[:]
    random.shuffle(local_best_key)

    local_p_text = decipher(c_text, local_best_key)
    local_best_score = fit.score(local_p_text)
    loop_count = 0
    while loop_count < 1000:
        index1 = random.randint(0, 25)
예제 #30
0
import random
from ngram_score import ngram_score
import re

# load our quadgram model
with open ('quadgrams.txt', 'r') as ngram_file:
	ngrams = ngram_file.readlines()
fitness = ngram_score(ngrams)

# helper function, converts an integer 0-25 into a character
def i2a(i): return 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'[i%26]

# decipher a piece of text using the substitution cipher and a certain key
def sub_decipher(text,key):
	invkey = [i2a(key.index(i)) for i in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ']
	ret = ''
	for c in text:
		if c.isalpha(): ret += invkey[ord(c.upper())-ord('A')]
		else: ret += c
	return ret

def break_simplesub(ctext,startkey=None):
	''' perform hill-climbing with a single start. This function may have to be called many times
		to break a substitution cipher. '''
	# make sure ciphertext has all spacing/punc removed and is uppercase
	ctext = re.sub('[^A-Z]','',ctext.upper())
	parentkey,parentscore = startkey or list('ABCDEFGHIJKLMNOPQRSTUVWXYZ'),-99e99
	if not startkey: random.shuffle(parentkey)
	parentscore = fitness.score(sub_decipher(ctext,parentkey))
	count = 0
	while count < 1000:
예제 #31
0
import os
os.listdir()

from pycipher import SimpleSubstitution as SimpleSub
import random
import re
from ngram_score import ngram_score
fitness = ngram_score(
    '/root/Documents/CTF_WECALL/quadgrams.txt')  # load our quadgram statistics

ctext = 'UH BDP TFYXRDBH RZV HZQ STI WPTV BDXE YH GWXPIV X TY XYLWPEEPV NPWH APFF VZIP HZQW EZFQBXZI OPH XE YIGEGUVYFIDT BDXE FXBBFP SDTFFPIRP ATE IZB BZZ DTWV ATE XB'
#ctext = re.sub('[^A-Z]','',ctext.upper())
ctext.rfind()
maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
maxscore = -99e9
parentscore, parentkey = maxscore, maxkey[:]
print("Substitution Cipher solver, you may have to wait several iterations")
print("for the correct result. Press ctrl+c to exit program.")
# keep going until we are killed by the user
i = 0
while 1:
    i = i + 1
    random.shuffle(parentkey)
    deciphered = SimpleSub(parentkey).decipher(ctext)
    parentscore = fitness.score(deciphered)
    count = 0
    while count < 1000:
        a = random.randint(0, 25)
        b = random.randint(0, 25)
        child = parentkey[:]
        # swap two characters in the child
예제 #32
0
from operator import itemgetter
from ngram_score import ngram_score

def decrypt(ctxt, key):
    ret = str()
    for c in ctxt:
        i = ord(c)
        i -= ord('A')
        i -= key
        i = (i+26)%26
        i += ord('A')
        c = chr(i)
        ret += c
    return ret

fi = open("caesar_ciphertext.txt", "r")
ciphertext = fi.readline().strip()
fi.close()

ns = ngram_score("english_quadgrams.txt")

texts = dict()
for k in range(0, 26):
    plaintext = decrypt(ciphertext, k)
    scr = ns.score(plaintext)
    texts[plaintext] = scr
    print(plaintext + " " + str(ns.score(plaintext)))

result = max(texts, key=texts.get)
print("Result: " + result)
# usage: python break_fracmorse.py 'CIPHERTEXTMESSAGE'
# ideally you'll want 200 or so characters to reliably decrypt, shorter will often work but not as reliably.

import random
from ngram_score import ngram_score
import re
import sys
from pycipher import FracMorse

#ctext = FracMorse('PQRSTUVWXYZABCDEFGHIJKLMNO').encipher("He has not been returned to sea because of his affection for caregivers.The waitress pointed to the lunch menu, but the oldest living ex-major leaguer had no use for it")
fitness = ngram_score('fmorse_quadgrams.txt') # load our quadgram model

# helper function, converts an integer 0-25 into a character
def i2a(i): return 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'[i%26]

# decipher a piece of text using the substitution cipher and a certain key    
def sub_decipher(text,key):
    invkey = [i2a(key.index(i)) for i in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ']
    ret = ''      
    for c in text:
        if c.isalpha(): ret += invkey[ord(c.upper())-ord('A')]
        else: ret += c
    return ret

# This code is just the simple substitution cipher cracking code, it works perfectly for fractionated morse as
# long as you use fractioned morse statistics instead of english statistics.
def break_simplesub(ctext,startkey=None):
    ''' perform hill-climbing with a single start. This function may have to be called many times
        to break a substitution cipher. '''
    # make sure ciphertext has all spacing/punc removed and is uppercase
    ctext = re.sub('[^A-Z]','',ctext.upper())
예제 #34
0
import re
import sys
from ngram_score import ngram_score
fitness = ngram_score('quadgrams.txt')  # load our quadgram statistics
from pycipher import Caesar


def break_caesar(ctext):
    # make sure ciphertext has all spacing/punc removed and is uppercase
    ctext = re.sub('[^A-Z]', '', ctext.upper())
    # try all possible keys, return the one with the highest fitness
    scores = []
    for i in range(26):
        scores.append((fitness.score(Caesar(i).decipher(ctext)), i))
    return max(scores)


# ciphertext
ctext = ""
if (len(sys.argv) >= 2):
    with open(sys.argv[1], "r") as f:
        ctext = str(f.read())
    print("Cipher text: " + ctext)
else:
    print("Usage: " + sys.argv[0] + " filename")
max_key = break_caesar(ctext)

print('best candidate with key (a,b) = ' + str(max_key[1]) + ':')
plaintext = Caesar(max_key[1]).decipher(ctext)
print(plaintext)
예제 #35
0
#This program decrypts a message encrypted using Vignère cipher (without knowing the key) :
#   To decrypt a message, this program tests the decryption with all possible keys, for example if the key has a maximum 
#   length of 5, it begins to decrypt using as key A ..... Z then AA ........ ZZ to AAAAA ........ ZZZZZ
#   then it selects the best decrypted sentence that looks like english   
#   For this, the program uses :
#       -An english dictionary which contains english quadgrams and a Python program that calculates sentence scores according 
#	 to their similarity to English : https://github.com/jameslyons/python_cryptanalysis/blob/master/ngram_score.py 
#   The key here is set to have a maximum size of 10 characters, this can be changed by modifying the 'for' loop at line 92
#   See example file 'vignere_autobreak.txt'

import ngram_score as ns
import numpy as np

fit=ns.ngram_score('english_quadgrams.txt') 

def grid():
        """
        prints Vignère table
        """
        print("    ",end="")
        for i in range(65,91):
                print("%c"%i,end=" ")
        print("")
        print("    ",end="")
        for i in range(65,91):
                print("_",end=" ")
        print("")
        for i in range(65,91):
                print("%c |"%i,end=" ")
                j=i
                while (j<=90):
예제 #36
0
from pycipher import SimpleSubstitution as SimpleSub
import random
import re
from ngram_score import ngram_score
fitness = ngram_score('spanish_quadgrams.txt') # load our quadgram statistics

ctext='pjbbfcklerfebjppjjlboumcuppelqpfezbjruoqlerdjbcuddbukulfjojprfebjbjzfrtmloupraublxpepkurtppdbjcbelfrfebkj'
ctext = re.sub('[^A-Z]','',ctext.upper())

maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
maxscore = -99e9
parentscore,parentkey = maxscore,maxkey[:]
print "Substitution Cipher solver, you may have to wait several iterations"
print "for the correct result. Press ctrl+c to exit program."
# keep going until we are killed by the user
i = 0
while 1:
    i = i+1
    random.shuffle(parentkey)
    deciphered = SimpleSub(parentkey).decipher(ctext)
    parentscore = fitness.score(deciphered)
    count = 0
    while count < 1000:
        a = random.randint(0,25)
        b = random.randint(0,25)
        child = parentkey[:]
        # swap two characters in the child
        child[a],child[b] = child[b],child[a]
        deciphered = SimpleSub(child).decipher(ctext)
        score = fitness.score(deciphered)
        # if the child was better, replace the parent with it
예제 #37
0
#######################################
##breaker by practicalcryptography.com
#######################################

from ngram_score import ngram_score
from pycipher import Vigenere
import re
from itertools import permutations

f = file('encoded.txt','r')
ctext = f.read().replace(" ","")
qgram = ngram_score('english_quadgrams.txt')
trigram = ngram_score('english_trigrams.txt')
#ctext = 'kiqpbkxspshwehospzqhoinlgapp'
ctext = re.sub(r'[^A-Z]','',ctext.upper())

# keep a list of the N best things we have seen, discard anything else
class nbest(object):
    def __init__(self,N=1000):
        self.store = []
        self.N = N
        
    def add(self,item):
        self.store.append(item)
        self.store.sort(reverse=True)
        self.store = self.store[:self.N]
    
    def __getitem__(self,k):
        return self.store[k]

    def __len__(self):
import ngram_score as ns
import caesar_cipher as cc

fitness = ns.ngram_score('data/english_quadgrams.txt')


def break_cipher(ciphertext, keys):
    max_fitness_score = float('-inf')
    for key in keys:
        plaintext = cc.caesar_decipher(ciphertext, key)
        fitness_score = fitness.score(plaintext)
        if fitness_score > max_fitness_score:
            max_fitness_score = fitness_score
            most_accurate_plaintext = plaintext
            most_accurate_key = key
        print(
            f'Key = {key}\tPlaintext = {plaintext}\tFitness = {fitness_score}')
    print(
        f'\nMost accurate key = {most_accurate_key}\tMost accurate plantext = {most_accurate_plaintext}\tFitness = {max_fitness_score}\n'
    )


def example1():
    break_cipher('YMJHFJXFWHNUMJWNXTSJTKYMJJFWQNJXYPSTBSFSIXNRUQJXYHNUMJWX',
                 range(1, 26))
    break_cipher('VHFUHW', [3, 5])


def example2():
    ciphertext = "YMJRFLNHTKHTINSLNSYMJZSNAJWXNYD"
    break_cipher(ciphertext, range(1, 26))
from ngram_score import ngram_score
from pycipher import Autokey
import re
from itertools import permutations

qgram = ngram_score('quadgrams.txt')
trigram = ngram_score('trigrams.txt')
ctext = 'isjiqymdebvuzrvwhmvysibugzhyinmiyeiklcvioimbninyksmmnjmgalvimlhspjxmgfiraqlhjcpvolqmnyynhpdetoxemgnoxl'
ctext = re.sub(r'[^A-Z]', '', ctext.upper())


# keep a list of the N best things we have seen, discard anything else
class nbest(object):
    def __init__(self, N=1000):
        self.store = []
        self.N = N

    def add(self, item):
        self.store.append(item)
        self.store.sort(reverse=True)
        self.store = self.store[:self.N]

    def __getitem__(self, k):
        return self.store[k]

    def __len__(self):
        return len(self.store)


#init
N = 100
예제 #40
0
from ngram_score import ngram_score
from pycipher import Autokey
import re
from itertools import permutations

qgram = ngram_score('quadgrams.txt')
trigram = ngram_score('trigrams.txt')
ctext = 'isjiqymdebvuzrvwhmvysibugzhyinmiyeiklcvioimbninyksmmnjmgalvimlhspjxmgfiraqlhjcpvolqmnyynhpdetoxemgnoxl'
ctext = re.sub(r'[^A-Z]','',ctext.upper())

# keep a list of the N best things we have seen, discard anything else
class nbest(object):
    def __init__(self,N=1000):
        self.store = []
        self.N = N
        
    def add(self,item):
        self.store.append(item)
        self.store.sort(reverse=True)
        self.store = self.store[:self.N]
    
    def __getitem__(self,k):
        return self.store[k]

    def __len__(self):
        return len(self.store)

#init
N=100
for KLEN in range(3,20):
    rec = nbest(N)
예제 #41
0
import random
import ngram_score as ns
#This was programmed and run in an online compiler so runtimes may very

#Sources used

#https://inventwithpython.com/hacking/chapter17.html
#http://practicalcryptography.com/cryptanalysis/text-characterisation/quadgrams/
#https://repository.cardiffmet.ac.uk/bitstream/handle/10369/8628/Brown%2C%20Ryan%20James.pdf?sequence=1&isAllowed=y

fitness = ns.ngram_score('mixedEnglishgrams.txt')
#collection of ngrams from the internet, uses one that best fits the size of the text.
cipher = "iyhqz ewqin azqej shayz niqbe aheum hnmnj jaqii yuexq ayqkn jbeuq iihed yzhni ifnun sayiz yudhe sqshu qesqa iluym qkque aqaqm oejjs hqzyu jdzqa diesh niznj jayzy uiqhq vayzq shsnj jejjz nshna hnmyt isnae sqfun dqzew qiead zevqi zhnjq shqze udqai jrmtq uishq ifnun siiqa suoij qqfni syyle iszhn bhmei squih nimnx hsead shqmr udquq uaqeu iisqe jshnj oihyy snaxs hqihe lsilu ymhni tyz"
message = cipher.upper()


def swap(key):

    x = 0
    y = 0
    new = ""

    while (x == y):

        x = random.randint(0, 25)  #randomly swaps them from the alphabet
        y = random.randint(0, 25)

    for i in key:  #chooses the index in which the swap occurs
        if key.index(i) == x:
            new += key[y]
        elif key.index(i) == y:
예제 #42
0
	word_list.append("GEEGB")
	word_list.append("GEB")
	
	#Vigenere decode the using SOWPODS file
	print("Vigenere decoding " + str(len(decoded_texts)) + " candidates with " + str(len(word_list)) + " key words...\n(Good luck!)")
	vig_decode = []
	counter = 0
	for word in word_list:
		print(str((counter / len(word_list)) * 100) + "%")
		for item in decoded_texts:
			vig_decode.append(tryDecode(word, item))
		counter = counter + 1
	
	#Check the decoded results for any words matching the list of common words
	print("Calculate quadgram scores for " + str(len(vig_decode)) + " strings...")
	fitness = ns.ngram_score("c:/Users/Tommy/OneDrive/Documents/crypto1/quadgrams.txt")
	scores = []
	for items in vig_decode:
		scores.append(fitness.score(items))
	
	print("Sorting results...")
	sorted_texts = estimatedBest(vig_decode, scores)
	
	#use this for threading
	#q1 = Queue()
	#t1 = Process(target=findEnglishWords, args=(decoded_list_1, common_words_file, 0, q1)).start()
	#word_hits_1 = q1.get()
	
	#Output the results
	for i in range(0, len(sorted_texts)):
		print(str(vig_decode[sorted_texts[i]]))
예제 #43
0
import random
import string
import nltk
from six.moves import xrange
from ngram_score import ngram_score

fitness = ngram_score('english_bigrams.txt')

def sub_decipher(text, key, alphabet):
    invkey = [string.ascii_uppercase[(key.index(c))] for c in alphabet]
    ret = ''
    for c in text:
        ret += invkey[ord(c) - ord(alphabet[0])]
    return ret

def next_iteration(text, key, alphabet):
    alphabet_size = len(alphabet)
    iterations_count = 1000
    score = fitness.score(sub_decipher(text, key, alphabet))
    count = 0
    while count < iterations_count:
        a = random.randint(0, alphabet_size - 1)
        b = random.randint(0, alphabet_size - 1)
        child = key[:]
        child[a], child[b] = child[b], child[a]
        current_score = fitness.score(sub_decipher(text, child, alphabet))
        if current_score > score:
            score, key = current_score, child[:]
            count = 0
        count += 1
    return score, key