Esempio n. 1
0
def score_accuracy(encryption_key, decryption_key, cipher_text, original_text):
    ''' 
    Scores how accurate a decryption key was in decrypting a given cipher_text encrypted using a given encryption_key
    The score is given as a percent of correct letters in the encryption key that are mapped back to their original letters
    '''
    true_decryption_key = util.getDecryptionKey(encryption_key)
    original_text_noised = util.encryptCase(cipher_text, true_decryption_key)
    decrypted_text_noised = util.encryptCase(cipher_text, decryption_key)
    num_same_all = [1 for x,y,z in zip(original_text_noised, decrypted_text_noised, original_text) if x == z and x == y and x != " "]
    num_same_original = [1 for x,z in zip(original_text_noised, original_text) if x == z and x != " "]
    return sum(num_same_all)/float(sum(num_same_original))
Esempio n. 2
0
    def decrypt(self, text):
        # Count character frequencies of text
        cipher_char_frequency = {}
        for i in string.ascii_uppercase:
            cipher_char_frequency[i] = 0

        for i in text:
            if i in string.ascii_letters:
                cipher_char_frequency[i.upper()] = cipher_char_frequency.get(i.upper()) + 1

        cipher_letters_by_freq = "".join(sorted(cipher_char_frequency, key=cipher_char_frequency.get, reverse=True))
        decryption_key = "".join([y for (x,y) in sorted(zip(self.letters_by_freq, cipher_letters_by_freq))])
        return (util.encryptCase(text, decryption_key), decryption_key)
Esempio n. 3
0
  def decrypt(self, cipherText):
    upperCipher = cipherText.upper()
    def swapIndices(a, b, key):
      tempKey = list(key)
      tempKey[a], tempKey[b] = tempKey[b], tempKey[a]
      return "".join(tempKey)

    def sample(swaps):
      r = random.random()
      start = 0
      for swap in swaps:
        start += swap[0]
        if r <= start: return swap

    def gibbs(key):
      best_swap = (float('-inf'),"")
      last_n = []
      for i in xrange(200):
        for a in xrange(len(key)):
          swaps = []
          for b in xrange(len(key)):
            temp_key = swapIndices(a, b, key)
            temp_score = self.languagemodel.score(util.encrypt(upperCipher, temp_key))
            temp_swap = (temp_score, temp_key)
            if temp_swap[0] > best_swap[0]: best_swap = temp_swap
            swaps.append(temp_swap)

          # convert to probabilities
          maxSwap = max(swaps, key=operator.itemgetter(0))
          swaps = [(math.e ** (swap[0] - maxSwap[0]),swap[1],swap[0]) for swap in swaps]
          scoreSum = sum([swap[0] for swap in swaps])
          swaps = [(float(swap[0])/scoreSum,swap[1],swap[2]) for swap in swaps]

          # sample randomly
          selected = sample(swaps)
          key = selected[1]

        # keep last n swaps
        converge_n = 5
        last_n.append(best_swap)
        last_n = last_n[-converge_n:]

        # print best_swap[0], util.encrypt(upperCipher, best_swap[1])

        # check for convergence
        avgSwap = sum([swap[0] for swap in last_n]) / float(converge_n)
        if sum([abs(swap[0] - avgSwap) <= 1 for swap in last_n]) == converge_n:
          return best_swap

        decrypted = util.encrypt(upperCipher, best_swap[1])
        sys.stdout.write('.')
        sys.stdout.flush()

      return best_swap

    best_swap = (float('-inf'),"")
    num_best = 0
    for i in xrange(self.numIters):
      key = util.generateKey()
      swap = gibbs(key)
      if swap[0] == best_swap[0]: num_best += 1
      if swap[0] > best_swap[0]:
        best_swap = swap
        num_best = 1

      sys.stdout.write(str(float(i+1)/self.numIters * 100) + "%")
      sys.stdout.flush()

    translated = util.encryptCase(cipherText, best_swap[1])
    print "\n", num_best, "BEST: ", best_swap[0], translated
    return translated, best_swap[1], num_best
Esempio n. 4
0
def main(argv):
    learnfile = "ngrams.txt"
    testfile = "europarl-v7.es-en.en"
    verbose = False
    noise = 0.05
    numIterations = 0
    minLength = 10
    maxLength = 60

    def printHelpMessage():
        print 'decryptor.py [-i <n-gram file> -t <testfile> -n <noise level>]'
        print '-v verbose'
        print '-h help'
    try:
        opts, args = getopt.getopt(argv,"hvi:t:n:")
    except getopt.GetoptError:
        printHelpMessage()
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            printHelpMessage()
            sys.exit()
        elif opt in ("-i"): learnfile = arg
        elif opt in ("-t"): testfile = arg
        elif opt in ("-n"): noise = float(arg)
        elif opt in ("-v"): verbose = True

    print "Learning..."
    sys.stdout.flush()
    languagemodel = LanguageModel.LanguageModel(learnfile)
    original_text_file = open(testfile, "r")

    cipher_solver = solver.Solver(languagemodel)
    cipher_baseline = baseline.Baseline()
    solver_accuracy = []
    baseline_accuracy = []
    max_counts = []

    for original_text in original_text_file:
        if len(original_text) < minLength: continue
        if len(original_text) > maxLength: continue
        numIterations += 1
        encryption_key = util.generateKey()
        original_text_noised = util.add_noise(original_text, noise)
        cipher_text = util.encryptCase(original_text_noised, encryption_key)
        startTime = datetime.datetime.now()

        if verbose:
            print "============================"
            print "Iteration ", numIterations
            print "Length ", len(original_text)
            print "Start Time", startTime
            print "Original Text", original_text
            print "Original Text Noised", original_text_noised
            print "Key", encryption_key
            print "Cipher Text Noised", cipher_text
            
        
        baseline_text, baseline_decryption_key = cipher_baseline.decrypt(cipher_text)
        guess_text, guess_decryption_key, num_guesses = cipher_solver.decrypt(cipher_text)

        baseline_score = score_accuracy(encryption_key, baseline_decryption_key, cipher_text, original_text)
        baseline_accuracy.append(baseline_score)
        solver_score = score_accuracy(encryption_key, guess_decryption_key, cipher_text, original_text)
        solver_accuracy.append(solver_score)
        max_counts.append(num_guesses)

        if verbose:
            print "End Time", datetime.datetime.now()
            print "Duration", datetime.datetime.now() - startTime
            print "Length, Accuracy, Duration,", len(original_text), ',', solver_score, ',', datetime.datetime.now() - startTime
            print "Baseline Accuracy: ", baseline_score
            print "Average Accuracy of Baseline: ", sum(baseline_accuracy)/len(baseline_accuracy)
            print "Solver Accuracy: ", solver_score
            print "Average Accuracy of Solver: ", sum(solver_accuracy)/len(solver_accuracy)
            print "Reached same thing many times", max_counts

    print "Average Accuracy of Baseline: ", sum(baseline_accuracy)/len(baseline_accuracy)
    print "Average Accuracy of Solver: ", sum(solver_accuracy)/len(solver_accuracy)
    print "Over %d cipher texts" % len(solver_accuracy)