def plotFreq(ciph): ciph = Format.keepOnly(ciph.lower(), ALPH) fig, ax = plt.subplots(figsize=(10, 5)) barwidth = 0.3 lettcounts = [FreqAnalysis.englishProbabilities.get(x, 0) for x in ALPH] ciphprobs = FreqAnalysis.getFrequencies(ciph) lettplot = [] for x in range(26): lettplot.append(x - (barwidth / 2)) ax.bar(lettplot, lettcounts, width=barwidth, label="English", color="r") try: ciphcounts = [ciphprobs.get(x, 0) / len(ciph) for x in ALPH] ciphplot = [] for x in range(26): ciphplot.append(x + (barwidth / 2)) ax.bar(ciphplot, ciphcounts, width=barwidth, label="Cipher Text", color="b") except ZeroDivisionError: pass ax.get_yaxis().set_visible(False) ax.set_xticks(range(26)) ax.set_xticklabels(ALPH.upper()) ax.legend() output = io.BytesIO() FigureCanvas(fig).print_png(output) return Response(output.getvalue(), mimetype="image/png")
def addSpaces(): if request.method == "POST": plainText = Format.remove(request.json["plain"], SPACE) plainText = SpaceAdd.addLongest(plainText) score = DetectEnglish.detectWord(plainText) * 100 return json.dumps({"plain": plainText, "score": f"{score}% certainty"}) return "error"
def decrypt(ciph): """Use a hill-climbing algorithm to decipher a substituted alphabet.""" ciph = Format.keepOnly(ciph.lower(), ALPH) if not ciph: return ciph, {x: "" for x in ALPH} key = [ x[0] for x in FreqAnalysis.getFrequencies(ciph).most_common() if x[0] in ALPH ] keyMap = dict(zip(key, letterProbs)) bestKey = [] bestScore = 0 i = 0 while i < 1000: result = sub(ciph, keyMap) score = DetectEnglish.detect(result) if score > bestScore: bestScore = score bestKey = list(key) i = 0 x = random.randint(1, len(key) - 1) y = random.randint(1, len(key) - 1) key = list(bestKey) key[x], key[y] = bestKey[y], bestKey[x] keyMap = dict(zip(key, letterProbs)) i += 1 bestMap = dict(zip(key, letterProbs)) result = sub(ciph, bestMap) return result, bestMap
def addForwards(text): """Insert spacing into text.""" with open("static/txt/wordlist.txt", encoding="utf-8") as f: wordset = set(f.read().split("\n")) string = Format.keepOnly(text, ALPH) result = [] maxLen = DetectEnglish.getLongest() x = maxLen while True: word = string[:x] if word in wordset: result.append(word) string = string[x::] x = maxLen else: x -= 1 if x == 0 and string: result.append(string[0]) string = string[1::] x = maxLen elif not string: break return " ".join(result)
def subInputs(): if request.method == "POST": changed = request.json["name"][0] newval = request.json["val"].lower() if newval == "": newval = "_" ciphText = Format.keepOnly(request.json["ciph"].lower(), ALPH) plainText = Format.remove(request.json["plain"], SPACE).lower() if plainText == "": new = ''.join([newval if x in changed else "_" for x in ciphText]) else: plainText = [x for x in plainText] for i, letter in enumerate(ciphText): if letter == changed: plainText[i] = newval new = "".join(plainText) score = DetectEnglish.detectWord(SpaceAdd.add(new)) * 100 return json.dumps({"plain": new, "score": f"{score}% certainty"}) return "error"
def indexOfCoincidence(text): """Calculate the Index of Coincidence of a piece of text.""" if len(text) == 1: return 0 text = Format.keepOnly(text.lower(), ALPH) count = FreqAnalysis.getFrequencies(text).most_common() ic = sum([(x[1] * (x[1] - 1)) / (len(text) * (len(text) - 1)) for x in count]) return ic
def chiSquared(text): """Calculate the Chi-Squared statistic of a piece of text.""" if not text: return 0 text = Format.keepOnly(text.lower(), ALPH) count = {x: text.count(x) for x in letterProbs} predict = {x[0]: letterProbs.get(x[0], 0) * len(text) for x in count} return sum([((count[x] - predict[x])**2) / predict[x] for x in letterProbs])
def decryptWithSubstitution(ciph): """Decrypt a general polyalphabetic substitution cipher using mulitple simultaneous hill-climbing algorithms.""" ciph = Format.keepOnly(ciph.lower(), ALPH) length = len(ciph) subs = [] for x in range(0, 7): substring = ciph[x::7] key = [y[0] for y in FreqAnalysis.getFrequencies(substring).most_common() if y[0] in ALPH] for char in letterProbs: if char not in key: key.append(char) subs.append((substring, key)) i = 0 bestScore = 0 result = [] for sub in subs: keyMap = dict(zip(sub[1], letterProbs)) result.append(Substitution.sub(sub[0], keyMap)) result = "".join("".join(b) for b in zip_longest(*result, fillvalue="")) bestScore = DetectEnglish.detect(result, length=length) while i < 10000: x = random.randint(0, len(subs) - 1) y = random.randint(1, len(subs[x][1]) - 1) z = random.randint(1, len(subs[x][1]) - 1) subs[x][1][y], subs[x][1][z] = subs[x][1][z], subs[x][1][y] result = [] for sub in subs: keyMap = dict(zip(sub[1], letterProbs)) result.append(Substitution.sub(sub[0], keyMap)) result = "".join("".join(b) for b in zip_longest(*result, fillvalue="")) score = DetectEnglish.detect(result, length=length) if score > bestScore: bestScore = score i = 0 else: subs[x][1][y], subs[x][1][z] = subs[x][1][z], subs[x][1][y] i += 1 result = [] for sub in subs: keyMap = dict(zip(sub[1], letterProbs)) key = "" for x in ALPH: for k, v in keyMap.items(): if k == x: key += v result.append(Substitution.sub(sub[0], keyMap)) result = "".join("".join(b) for b in zip_longest(*result, fillvalue="")) return result
def transposition(): args = {"title": "Transposition", "ciphText": "", "result": "", "score": 0, "keylen": "", "key": ""} if request.method == "POST": from Ciphers import Transposition ciphText = Format.remove(request.form["ciphInput"], PUNC).lower() keylen = int(request.form["keylenInput"] or 0) key = request.form["keyInput"] result, key = Transposition.decrypt(ciphText, key=key, keylen=keylen) key = ",".join(key) score = DetectEnglish.detectWord(SpaceAdd.addLongest(result)) * 100 args = {"title": "Transposition", "ciphText": ciphText, "result": result, "score": score, "keylen": keylen, "key": key} return render_template(f"ciphers/transposition.html", **args)
def substitution(): args = {"title": "Substitution", "ciphText": "", "result": "", "score": 0, "vals": {}} if request.method == "POST": from Ciphers import Substitution ciphText = Format.remove(request.form["ciphInput"], PUNC).lower() if request.form.get("useSpace"): result, vals = Substitution.decryptWithSpaces(ciphText) else: result, vals = Substitution.decrypt(ciphText) score = DetectEnglish.detectWord(SpaceAdd.addLongest(result)) * 100 args = {"title": "Substitution", "ciphText": ciphText, "result": result, "score": score, "vals": vals} return render_template(f"ciphers/substitution.html", **args)
def find(word): """Find anagrams of a given word.""" with open("static/txt/wordlist.txt", encoding="utf-8") as f: wordset = f.read().split("\n") word = Format.keepOnly(word.lower(), ALPH) anagrams = [] pattern = sorted(word) for x in wordset: if sorted(x) == pattern: anagrams.append(x) anagrams.sort() return anagrams
def decrypt(ciph, keylen=0, key=""): """ Attempt decryption of the transposition-enciphered text. One of keylen or key is required to function. """ if not (key or keylen): return "", "" ciph = Format.keepOnly(ciph.lower(), ALPH, NUMS) text = _process(ciph, keylen=keylen, key=key) if key: return _decryptWithKey(text, key.split(",")) if keylen < 9: bestResult, bestKey = _decryptShortKey(text) else: bestResult, bestKey = _decryptLongKey(text, keylen) bestScore = DetectEnglish.detectWord(SpaceAdd.addLongest(bestResult)) text = _process(ciph, keylen=len(ciph) // keylen, key=key) text = "".join(text) text = _process(text, keylen=keylen, key=key) if keylen < 9: result, key = _decryptShortKey(text) else: result, key = _decryptLongKey(text, keylen) score = DetectEnglish.detectWord(SpaceAdd.addLongest(result)) if score > bestScore: bestResult = result bestKey = key overflow = len(ciph) % keylen if overflow != 0: bestScore = 0 lastset = bestResult[-overflow:] overflow = len(lastset) for perm in itertools.permutations(lastset, overflow): result = bestResult[:-overflow] + "".join(perm) score = DetectEnglish.detectWord(SpaceAdd.addLongest(result)) if score > bestScore: bestScore = score bestResult = result return bestResult, bestKey, bestScore
def addLongest(text): """Insert spacing into text. Longest identified words inserted first.""" with open("static/txt/wordlist.txt", encoding="utf-8") as f: wordset = set(f.read().split("\n")) string = Format.keepOnly(text, ALPH) result = [""] * len(string) maxLen = DetectEnglish.getLongest() for chunkSize in range(maxLen, 0, -1): for i in range(0, len(string) - chunkSize + 1): if string[i:i + chunkSize] in wordset: result[i] = string[i:i + chunkSize] string = string.replace(string[i:i + chunkSize], "." * chunkSize) result = filter(lambda x: x != "", result) return " ".join(result)
def decrypt(ciph, key="", keylen=0): """Automatically decrypt a vigenere cipher using the Index of Coincidence to find possible key lengths.""" ciph = Format.keepOnly(ciph.lower(), ALPH) if key: return decryptWithKey(ciph, key) if keylen: return decryptWithKeylen(ciph, int(keylen)) sub = {} for i in range(2, 26): sub[i] = [] for j in range(i): sub[i].append(ciph[j::i]) ic = {} for i in sub: avgic = sum(map(DetectEnglish.indexOfCoincidence, sub[i])) / i if avgic > 0.06: ic[i] = avgic bestKey = "" bestScore = 0 bestResult = "" for i in ic: results = [] key = [] for x in sub[i]: result, shift = Caesar.decrypt(x) results.append(result) key.append(shift) result = Transposition.recreate(results) score = DetectEnglish.detect(result) if score > bestScore: bestScore = score bestKey = "".join(key) bestResult = result return bestResult, ",".join(bestKey), bestScore
else: FormatClasses(str(filename)) with open( '../Extrct/%sClassExtraction' % os.path.basename(sys.argv[1]).rstrip('.txt'), 'rb') as d: Classifications = joblib.load(d) if os.path.isfile('../Extrct/%sFeatureExtraction' % (str(os.path.basename(sys.argv[1]).rstrip('.txt')))): with open( '../Extrct/%sFeatureExtraction%s' % (str(os.path.basename(sys.argv[1]).rstrip('.txt')), int(sys.argv[2])), 'rb') as f: Features = joblib.load(f) else: Format(str(filename), int(sys.argv[2])) with open( '../Extrct/%sFeatureExtraction%s' % (str(os.path.basename(sys.argv[1]).rstrip('.txt')), int(sys.argv[2])), 'rb') as f: Features = joblib.load(f) clf = sklearn.tree.DecisionTreeClassifier() model = clf.fit(Features, Classifications) with open( '../Models/%sDTC%s.pkl' % (os.path.basename(sys.argv[1]).rstrip('.txt'), int(sys.argv[2])), 'wb') as oot: pickle.dump(model, oot)
def splitKey(): if request.method == "POST": key = Format.keepOnly(request.json["key"].lower(), ALPH) key = ",".join([x for x in key]) return json.dumps({"key": key}) return "error"
import re import os.path call = time.time() model = '../Models/globular_signal_peptide_2state.3lineDTC15.pkl' ws = int(re.findall(r'\d+', model)[2]) with open('../Models/globular_signal_peptide_2state.3lineDTC15.pkl', "rb") as f: clf = pickle.load(f) SpIndexR = {0: 'S', 1: 'G'} PepstoPred = sys.argv[1] print("formatting fasta features") FeaturesPred = Format(PepstoPred, ws) FeaturesPredOnTest = ParseSeqstoDict(PepstoPred) print("format complete. Predicting peptides") prediction = list(clf.predict(FeaturesPred)) #StatsComp = FormatClasses(PepstoPred) gg = '' x = 0 FalseS = 0 FalseG = 0 wrong = 0 TotalS = 0 MissedS = 0 #The StatsComp expression was for internal testing - you know the classification of your fasta.
def decryptWithSpaces(ciph, keyMap=""): """ Use pattern-matching techniques to decipher a substituted alphabet. Requires properly spaced ciphertext to be effective. """ if not keyMap: keyMap = {key: set(ALPH) for key in ALPH} ciph = Format.remove(ciph, PUNC).lower() if not ciph: return ciph, {x: "" for x in ALPH} with open("static/txt/patterns.json", encoding="utf-8") as f: patterns = json.load(f) # Reformat text into set for cw in set(ciph.split(" ")): newMap = {key: set() for key in ALPH} # Match pattern to wordlist pattern = PatternGen.pattern(cw) if pattern in patterns: for word in patterns[pattern]: for i, letter in enumerate(cw): newMap[letter].add(word[i]) # Remove impossible letters for letter in set(cw): keyMap[letter] = keyMap[letter] & newMap[letter] if keyMap[ letter] & newMap[letter] else keyMap[letter] solved = set() while True: # Look for 1-length (solved) mappings oldSolved = set(solved) solved = set( next(iter(val)) for val in keyMap.values() if len(val) == 1) if oldSolved == solved: break # Remove solved letters from other possible mappings _removeSolved(keyMap, solved) keyMap = {letter: keyMap[letter] or {"_"} for letter in keyMap} keylens = {length: [] for length in map(len, keyMap.values())} for letter in keyMap: keylens[len(keyMap[letter])].append(letter) while True: if len(keylens) == 1: break poss = [] for letter in ALPH: if letter in keylens[1] + keylens[list(keylens)[1]]: poss.append(keyMap[letter]) else: poss.append({"_"}) possKeys = list("".join(x) for x in itertools.product(*poss)) _, _, bestMap = getBest(possKeys, ciph) for k, v in bestMap.items(): if len(v) == 1 and v != "_": keyMap[k] = {v} while True: # Look for 1-length (solved) mappings oldSolved = set(solved) solved = set( next(iter(val)) for val in keyMap.values() if len(val) == 1) if oldSolved == solved: break # Remove solved letters from other possible mappings _removeSolved(keyMap, solved) keylens = {length: [] for length in map(len, keyMap.values())} for letter in keyMap: keylens[len(keyMap[letter])].append(letter) return sub(ciph, keyMap), keyMap