def solve(): login, _ = get_pics_from_file("./data/pics_LOGINMDP.bin") noise, _ = get_pics_from_file("./data/pics_NOKEY.bin") mean_noise = np.mean(noise, axis=0) login_denoised = np.subtract(login, mean_noise) keys = [] # Load all key data for key_file in glob.glob("./data/pics_*.bin"): if "LOGINMDP" in key_file or "NOKEY" in key_file: continue key = key_file.split("pics_")[1].replace(".bin", "") pics_cur, info = get_pics_from_file(key_file) key_mean_denoise = np.mean(np.subtract(pics_cur, mean_noise), axis=0) keys.append(Key(key, key_mean_denoise)) second_keys = keys + [Key("NOKEY", np.zeros(len(mean_noise), dtype=np.double))] selected_keys = [] # List of the selected best keys # Associate key(s) to a chunk for low, high in chunks: chunk_mean = np.mean(login_denoised[low:high], axis=0) min_score = 100000.0 best_keys = [] for first_key in keys: for second_key in second_keys: score = np.linalg.norm(chunk_mean - (first_key.mean + second_key.mean)) if score < min_score: min_score = score best_keys.append((score, [first_key, second_key])) best_keys = sorted(best_keys, key=lambda a: a[0]) print(f"chunks[{low},{high}]:") for i in range(3): candidate = best_keys[i] print(f" Candidate {i+1} (score: {candidate[0]}):", end="") presses = [] for key in candidate[1]: if key.name not in ["NOKEY"]: print(key.name, end=" ") presses.append(key.name) if i == 0: selected_keys.append("+".join(presses)) print("") print(f"Selected keys: {', '.join(selected_keys)}")
def new_trained_classifier(save=False, filename_model='classifier.model', noise_mean=None, with_specials=True): data, res = [], [] dico, _ = get_dico() for f in listdir('../data'): filename = str(join('../data', f)) letter = re.search('pics_(.+?)\\.bin', f).group(1) if isfile(join('../data', f)) and f != 'pics_LOGINMDP.bin' \ and ( with_specials or letter in string.digits or letter in string.ascii_uppercase or letter == "NOKEY" or letter == "ENTER"): sgl, infos = get_pics_from_file(filename) if noise_mean is not None: sgl = np.apply_along_axis(sub, 1, sgl, noise_mean) sgl = np.array(sgl) if not with_specials: sgl = np.delete(sgl, [4, 5], 1) data += list(sgl) res += [dico[letter]] * len(sgl) X_train, X_test, y_train, y_test = train_test_split(data, res, test_size=0.3, shuffle=True) clf = RandomForestClassifier(n_estimators=30, random_state=1, n_jobs=4) clf.fit(X_train, y_train) if save: joblib.dump(clf, filename_model) print(clf.score(X_test, np.array(y_test))) return clf
def __init__(self, filename, key): tab_pics, info = get_pics_from_file(filename) self.tab = tab_pics self.passage_vect = np.zeros(info["nb_trames"]) self.trame_seen = 0 self.key = from_key_to_vect(key) self.nb_trames = info["nb_trames"]
def predict_log_mdp_keys(log_filename, mlp): log_keys = [] pics_log,_ = get_pics_from_file(log_filename) for trame in pics_log: key_index = mlp.predict(trame) key = from_index_to_key(key_index) log_keys.append(key) return log_keys
def plot_encodage(characters_list, noise_trame, title=""): res = [] for c in characters_list: trames, infos = get_pics_from_file(f'../data/pics_{c}.bin') trames = np.array(trames) trames = np.apply_along_axis(sub, 1, trames, noise_trame) trames_moyenne_br = np.mean(trames, axis=0) res.append(trames_moyenne_br) plt.matshow(np.array(res)) plt.title(title) # locs, labels = plt.yticks() plt.yticks(range(len(characters_list)), characters_list) plt.show()
def get_prediction_list(model): """ The model is like "model = LinearRegression()" (or other algorithms) Prévoit/Estime les caractères du résultat grâce au modèle et supprime tous les 'NOKEY'. """ x_pred, info = get_pics_from_file("../tohack/pics_LOGINMDP.bin") res = [] predictions = model.predict(x_pred) for prediction in predictions: if prediction != 'NOKEY': res.append(prediction) return res
def get_prediction_list_keras(model, d_list, pic="tohack/pics_LOGINMDP"): """ The model is like "model = LinearRegression()" (or other algorithms) Prévoit/Estime les caractères du résultat grâce au modèle """ x_pred, info = get_pics_from_file(f"../{pic}.bin") res = [] preds_brut = model.predict(np.array(x_pred)) for pred in preds_brut: itemindex = np.where(pred == max(pred))[0][0] prediction = d_list[itemindex] #if "data" in pic or prediction != 'NOKEY': res.append(prediction) return res
def get_prediction_list2_keras(model, d_list, pic="tohack/pics_LOGINMDP"): """ The model is like "model = LinearRegression()" (or other algorithms) """ x_pred, info = get_pics_from_file(f"../{pic}.bin") res = [] preds_brut = model.predict(np.array(x_pred)) for pred in preds_brut: itemindex1 = np.where(pred == max(pred))[0][0] pred[itemindex1] = 0 itemindex2 = np.where(pred == max(pred))[0][0] prediction = (d_list[itemindex1], d_list[itemindex2]) if "data" in pic or prediction != 'NOKEY': res.append(prediction) return res
def get_dataset(): """ returns the dataset in a dict with key : name of the key value: list of spikes """ train_dataset = {} test_dataset = {} os.chdir('../data/') for filename in os.listdir(): list_of_spikes, info = get_pics_from_file(filename) #We get the name of the key by removing "pics_" at the beginning and ".bin" at the end key = filename[5:-4] if key in "ABCDEFGHIJKLMNOPQRSTUVWXYZ": key = key.lower() train_dataset[key] = list_of_spikes[:6000] test_dataset[key]= list_of_spikes[6000:] return train_dataset, test_dataset
import numpy as np from read_pics import get_pics_from_file from PIL import Image if __name__ == '__main__': pics_pad0, info = get_pics_from_file("../data/pics_NOKEY.bin") pics_a, info = get_pics_from_file("../data/pics_LOGINMDP.bin") img = Image.new('L', (info["nb_pics"], len(pics_pad0))) pixels = [] for i, frame in enumerate(pics_a): noise_frame = pics_pad0[i] for i, p in enumerate(frame): result = p - noise_frame[i] if result < 0: result = 0 pixels.append(int(result * 255)) img.putdata(pixels) img.save("file.png")
import matplotlib.pyplot as plt from read_pics import get_pics_from_file import numpy as np import pywt """ Wavelet transform sur la touche H """ if __name__ == "__main__": pics_pad0, info = get_pics_from_file("../input/Hackaton/data/pics_H.bin") list = [i for i in range(9)] list2 = [i for i in range(9, 18)] plot = [list, list2] figure, axis = plt.subplots(7, 7) for i in range(20): axis[i // 7, i % 7].plot(range(1, info["nb_pics"] + 1), np.fft.fft(pics_pad0[i]), 'ko') plt.xlabel('numéro de pic') plt.ylabel('valeur du pic') plt.title('key A') plt.ylim(0, 1.5) plt.grid(b=True, which='both') for i in range(20): axis[(i + 28) // 7, (i + 28) % 7].plot(plot, pywt.dwt(pics_pad0[i], 'db1'), 'ko') plt.xlabel('numéro de pic') plt.ylabel('valeur du pic') plt.title('key A') plt.ylim(0, 1.5) plt.grid(b=True, which='both') plt.show()
def noise_trame(): trames_noise, _ = get_pics_from_file(f'../data/pics_NOKEY.bin') return np.mean(trames_noise, axis=0)
dico_main, inv_dico_main = get_dico() # Plot les moyennes des trames en fontion de la lettre qu'elles representent plot_encodage_lettres() plot_encodage_chiffres() plot_encodage_spes() # Crée et entraine deux nouveaux classifiers et les sauvegarde # clf_without_spe = new_trained_classifier(save=True, filename_model="clf_without_spe.model", with_specials=False, noise_mean=noise_trame()) # clf_spe = new_trained_classifier(save=True, filename_model="clf.model", with_specials=True, noise_mean=noise_trame()) # Charge les classifiers depuis les fichiers clf_without_spe = get_trained_model(filename_model="clf_without_spe.model") clf_spe = get_trained_model(filename_model="clf.model") trames, infos = get_pics_from_file(f'../data/pics_LOGINMDP.bin') trames = np.array(trames) trames = np.apply_along_axis(sub, 1, trames, noise_trame()) trames_without_spe = np.delete(trames, [4, 5], 1) res = [] res_without_spe = [] for chk in np.split(trames, range(200, len(trames), 200)): histo = {k: 0 for k, _ in dico_main.items()} predictions = clf_spe.predict(chk) for prediction in predictions: tmp = inv_dico_main[prediction] histo[inv_dico_main[prediction]] += 1 res.append(sorted(histo.items(), key=lambda item: item[1], reverse=True)[:4])
def solve(): login, _ = get_pics_from_file("./data/pics_LOGINMDP.bin") noise, _ = get_pics_from_file("./data/pics_NOKEY.bin") mean_noise = np.mean(noise, axis=0) login_denoised = np.subtract(login, mean_noise) login_denoised_smooth = ndimage.gaussian_filter1d(login_denoised, 10, 0) # Split the input into segments ac = AgglomerativeClustering(compute_full_tree=True, distance_threshold=9, n_clusters=None) yac = ac.fit_predict(login_denoised_smooth) print(f"Cluster count: {ac.n_clusters_}") chunks_index = [0] for i in range(yac.size - 1): if yac[i] != yac[i + 1]: chunks_index.append(i) chunks_index.append(yac.size) chunks = [(x, y) for x, y in zip(chunks_index[:-1], chunks_index[1:])] print(len(chunks)) keys = [] # Load all key data for key_file in glob.glob("./data/pics_*.bin"): if "LOGINMDP" in key_file or "NOKEY" in key_file: continue key = key_file.split("pics_")[1].replace(".bin", "") pics_cur, info = get_pics_from_file(key_file) key_mean_denoise = np.mean(np.subtract(pics_cur, mean_noise), axis=0) keys.append(Key(key, key_mean_denoise)) second_keys = keys + [ Key("NOKEY", np.zeros(len(mean_noise), dtype=np.double)) ] selected_keys = [] # List of the selected best keys # Associate key(s) to a chunk for low, high in chunks: chunk_mean = np.mean(login_denoised[low:high], axis=0) min_score = 100000.0 best_keys = [] for first_key in keys: for second_key in second_keys: score = np.linalg.norm(chunk_mean - (first_key.mean + second_key.mean)) if score < min_score: min_score = score best_keys.append((score, [first_key, second_key])) best_keys = sorted(best_keys, key=lambda a: a[0]) print(f"chunks[{low},{high}]:") for i in range(3): candidate = best_keys[i] print(f" Candidate {i+1} (score: {candidate[0]}):", end="") presses = [] for key in candidate[1]: if key.name not in ["NOKEY"]: print(key.name, end=" ") presses.append(key.name) if i == 0: selected_keys.append("+".join(presses)) print("") print(f"Selected keys: {', '.join(selected_keys)}")
from sklearn.neighbors import KNeighborsClassifier import seaborn as sns from data_clean import detect_outliers from scipy.fft import fft from sklearn.svm import LinearSVC from sklearn.naive_bayes import GaussianNB if __name__ == "__main__": alphanum = "abcdefghijklmnopqrstuvwxyz0123456789" touchesspe = ["CTRL", "ENTER", "NOKEY", "SHIFT", "SPACE", "SUPPR"] n = 0 i = 0 df = pd.DataFrame() df_1 = pd.DataFrame() for c in alphanum: pics, info = get_pics_from_file("../input/Hackaton/data/pics_" + c + ".bin") n += len(pics) ff = [] df_test = generate_df(pics, c) df_temp = detect_outliers(df_test) df_temp['label'] = c df = df.append(df_temp) df_1 = df_1.append(df_test) i += 1 for c in touchesspe: pics, info = get_pics_from_file("../input/Hackaton/data/pics_" + c + ".bin") n += len(pics) df_test = generate_df(pics, c) df_temp = detect_outliers(df_test)
import numpy as np from read_pics import get_pics_from_file from PIL import Image if __name__ == '__main__': pics_pad0, info = get_pics_from_file("../data/pics_NOKEY.bin") pics_a, info = get_pics_from_file("../data/pics_CTRL.bin") img = Image.new('L', (info["nb_pics"], len(pics_pad0))) pixels = [] for i, frame in enumerate(pics_a): noise_frame = pics_pad0[i] for i, p in enumerate(frame): result = p - noise_frame[i] if result < 0: result = 0 pixels.append(int(result * 255)) img.putdata(pixels) img.save("file.png")