def draw_one(model_path, x, y, pacient, win_len): offsets = (5000 - win_len)//2 model = load_model(model_path) X = np.expand_dims(x[pacient, :, :], axis=0) Y = np.expand_dims(y[pacient,offsets:5000 - offsets,:], axis=0) prediction = np.array(model.predict(X)) prediction = prediction[:,offsets:5000-offsets,:] x_axis = np.arange(offsets/500, (win_len +offsets)/500, 1/500) plt.figure(figsize=(20, 5)) plt.plot(x_axis, x[pacient, offsets:5000 - offsets, 0], 'k') i = 0 predict_rounded = np.argmax(prediction, axis=2)[i] one_hot = np.zeros((predict_rounded.size, predict_rounded.max()+1)) one_hot[np.arange(predict_rounded.size), predict_rounded] = 1 plt.fill_between(x_axis, Y[i, :win_len, 1]*40 + -50, -50, color='r', alpha=0.3) plt.fill_between(x_axis, Y[i, :win_len, 2]*40 + -50, -50, color='g', alpha=0.3) plt.fill_between(x_axis, Y[i, :win_len, 0]*40 + -50, -50, color='b', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 1]*40), 0, color='r', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 2]*40), 0, color='g', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 0]*40), 0, color='b', alpha=0.3) stat = statistics(Y, prediction) F = F_score(stat) print(stat) print(F) plt.show()
def ranging(model_path, x, y, win_len, col="k", model_pred=None): """ plot a scattergram of F1 score for each patient :return: list of F1 scores """ offsets = (5000 - win_len) // 2 Y = y[:, offsets:5000 - offsets, :] if model_pred == None: model = load_model(model_path) prediction = np.array(model.predict(x)) else: prediction = model_pred prediction = prediction[:, offsets:5000 - offsets, :] dict = {} for i in range(len(x)): prediction_i = prediction[i, :, :] y_i = Y[i, :, :] stat = statistics(np.expand_dims(y_i, axis=0), np.expand_dims(prediction_i, axis=0)) F = F_score(stat) dict[i] = F dict = sorted(dict.items()) x, y_i = zip(*dict) plt.scatter(x, y_i, c=col, alpha=0.3) plt.show() return y_i
def trim(model, xtrain, ytrain, name, threshold, path_to_data, win_len): """ removes from xtrain, ytrain elements on which the model has F1 greater than threshold :param path_to_data: path to the folder where the trimmed dataset will be saved :return: trimmed dataset """ pred_train = np.array(model.predict(xtrain)) xtrain_new = xtrain.copy() ytrain_new = ytrain.copy() counter = 0 for i in range(len(xtrain)): pred = pred_train[i, win_len // 2:5000 - win_len // 2, :] y = ytrain[i, win_len // 2:5000 - win_len // 2, :] stat = statistics(np.expand_dims(y, axis=0), np.expand_dims(pred, axis=0)) F = F_score(stat) if F >= threshold: xtrain_new = np.delete(xtrain_new, i - counter, axis=0) ytrain_new = np.delete(ytrain_new, i - counter, axis=0) counter += 1 if not os.path.exists(path_to_data): os.makedirs(path_to_data) outfile = open(path_to_data + "\\trim_" + name + ".pkl", 'wb') pkl.dump({"x": xtrain_new, "y": ytrain_new}, outfile) outfile.close() return xtrain_new, ytrain_new
def histogram(model_paths_list, x, y, win_len, threshold=0.99): dict = {} for path in model_paths_list: _, filename = split(path) model_num = int(filename[len("ens_model_"):-3]) dict[model_num] = 0 model = load_model(path) predict = np.array(model.predict(x)) for i in range(len(x)): pred = predict[i, win_len // 2:5000 - win_len // 2, :] y_i = y[i, win_len // 2:5000 - win_len // 2, :] stat = statistics(np.expand_dims(y_i, axis=0), np.expand_dims(pred, axis=0)) F = F_score(stat) if F >= threshold: dict[model_num] += 1 return dict
def draw_all(model_path, x, y, win_len, model2=None): offsets = (5000 - win_len)//2 model = load_model(model_path) X = x Y = y[:,offsets:5000 - offsets,:] prediction = np.array(model.predict(X)) prediction = prediction[:,offsets:5000-offsets,:] if model2 != None: model2 = load_model(model2) prediction2 = np.array(model2.predict(X))[:,offsets:5000-offsets,:] x_axis = np.arange(offsets/500, (win_len +offsets)/500, 1/500) for i in range(len(X)): plt.figure(figsize=(20, 5)) plt.plot(x_axis, x[i, offsets:5000 - offsets, 0], 'k') predict_rounded = np.argmax(prediction, axis=2)[i] one_hot = np.zeros((predict_rounded.size, predict_rounded.max()+1)) one_hot[np.arange(predict_rounded.size), predict_rounded] = 1 plt.fill_between(x_axis, Y[i, :win_len, 1]*40 + -50, -50, color='r', alpha=0.3) plt.fill_between(x_axis, Y[i, :win_len, 2]*40 + -50, -50, color='g', alpha=0.3) plt.fill_between(x_axis, Y[i, :win_len, 0]*40 + -50, -50, color='b', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 1]*40), 0, color='r', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 2]*40), 0, color='g', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 0]*40), 0, color='b', alpha=0.3) if model2 != None: predict_rounded = np.argmax(prediction2, axis=2)[i] one_hot = np.zeros((predict_rounded.size, predict_rounded.max()+1)) one_hot[np.arange(predict_rounded.size), predict_rounded] = 1 plt.fill_between(x_axis, list(one_hot[:win_len, 1]*40+50), 50, color='r', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 2]*40+50), 50, color='g', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 0]*40+50), 50, color='b', alpha=0.3) stat = statistics(Y, prediction) F = F_score(stat) print(stat) print(F) plt.savefig("ill"+str(i)+".png") plt.clf()
def trim(model, xtrain, ytrain, data_name, threshold, path_to_data, win_len): pred_train = np.array(model.predict(xtrain)) xtrain_new = xtrain.copy() ytrain_new = ytrain.copy() counter = 0 for i in range(len(xtrain)): pred = pred_train[i, win_len // 2:5000 - win_len // 2, :] y = ytrain[i, win_len // 2:5000 - win_len // 2, :] stat = statistics(np.expand_dims(y, axis=0), np.expand_dims(pred, axis=0)) F = F_score(stat) if F >= threshold: xtrain_new = np.delete(xtrain_new, i - counter, axis=0) ytrain_new = np.delete(ytrain_new, i - counter, axis=0) counter += 1 outfile = open(path_to_data + "\\trim_" + data_name + ".pkl", 'wb') pkl.dump({"x": xtrain_new, "y": ytrain_new}, outfile) outfile.close() return xtrain_new, ytrain_new
def ranging(model_path, x, y, win_len, col= "k", is_path = True): offsets = (5000 - win_len)//2 Y = y[:,offsets:5000 - offsets,:] if is_path: model = load_model(model_path) prediction = np.array(model.predict(x)) else: prediction = model_path prediction = prediction[:,offsets:5000-offsets,:] dict = {} for i in range(len(x)): prediction_i = prediction[i,:,:] y_i = Y[i,:,:] stat = statistics(np.expand_dims(y_i, axis=0), np.expand_dims(prediction_i, axis=0)) F = F_score(stat) dict[i] = F dict = sorted(dict.items()) x, y_i = zip(*dict) plt.scatter(x, y_i, c=col, alpha=0.3) return y_i
def histogram(model_paths_list, x, y, win_len, threshold=0.99): """ returns a dictionary: {model number: number of patients from x with F1 score > threshold} :param model_paths_list: list of paths to the saved models :param x: dataset :param y: GT annotation """ dict = {} for path in model_paths_list: _, filename = split(path) model_num = int(filename[len("ens_model_"):-3]) dict[model_num] = 0 model = load_model(path) predict = np.array(model.predict(x)) for i in range(len(x)): pred = predict[i, win_len // 2:5000 - win_len // 2, :] y_i = y[i, win_len // 2:5000 - win_len // 2, :] stat = statistics(np.expand_dims(y_i, axis=0), np.expand_dims(pred, axis=0)) F = F_score(stat) if F >= threshold: dict[model_num] += 1 return dict
join(path_to_ensemble_models, f) for f in listdir(path_to_ensemble_models) if isfile(join(path_to_ensemble_models, f)) ] xy = load_dataset() X = xy["x"] Y = xy["y"] offsets = (5000 - win_len) // 2 xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.33, random_state=42) model = load_model(path_to_ensemble_models + "\\ens_model_1.h5") pred_e = ensemble_predict(model_paths_list, xtest) pred_ = model.predict(xtest) stat = statistics(ytest[:, win_len // 2:5000 - win_len // 2, :], pred_e[:, win_len // 2:5000 - win_len // 2, :]) print(F_score(stat)) #stat.to_csv("stats_one_test.csv", sep = ';') ranging(pred_e, xtest, ytest, win_len, col="k", is_path=False) plt.show() dict = histogram(model_paths_list, xtrain, ytrain, win_len, threshold=0.99) plt.bar(list(dict.keys()), dict.values(), color='g', alpha=0.5) plt.show() plot_two_prediction(pred_e, pred_, xtest, ytest, win_len, [5])
def RUN(): experiment_res_name = "experiment_convolutions_results_LONG" arr_models1 = {model18.make_model: "model 18 - 8 layers (32x8) 30 h lstm"} # модели участвующие в эксперименте arr_models = { model1.make_model: "model 1 - 1 layer (32x8) 50 h lstm", #model2.make_model: "model 2 - 1 layer (32x8) 30 h lstm", model3.make_model: "model 3 - 2 layers (32x8) 30 h lstm", model4.make_model: "model 4 - 3 layers (32x8) 30 h lstm", #model5.make_model: "model 5- 4 layers (16X5orx3) 30 h lstm", model6.make_model: "model 6 - 4 layers (32x8) 30 h lstm", model7.make_model: "model 7 - 4 layers (64x8) 30 h lstm---2", model8.make_model: "model 8 - 5 layers (32x8) 30 h lstm", model9.make_model: "model 9 - 6 layers (32x8) 30 h lstm", model10.make_model: "model 10- 7 layers (32x8) 30 h lstm", #model11.make_model: "model 11 - 7 layers (16x5) 15 h lstm", #model12.make_model: "model 12 - 7 layers 8x8 30 h lstm", #model13.make_model: "model 13 - 1 layer (32x8) 80 h lstm", model14.make_model: "model 14 - 1 layer (32x8) 60 h lstm", model15.make_model: "model 15 - 8 layers (8x8) 30 h lstm", model16.make_model: "model 16 - 7 layers (32X8) 30 h lstm---2", model17.make_model: "model 17 - 9 layers (32x8) 30 h lstm", model18.make_model: "model 18 - 8 layers (32x8) 30 h lstm" } logging.basicConfig(filename='log.log', level=logging.DEBUG) logging.info(experiment_res_name) xy = load_dataset() X = xy["x"] Y = xy["y"] # создает отлельную папку под результаты эксперимента и делаем ее на время умолчательной cwd = os.getcwd() if os.path.exists(experiment_res_name) and os.path.isdir( experiment_res_name): shutil.rmtree(experiment_res_name) os.makedirs(experiment_res_name) os.chdir(experiment_res_name) # common parameters in all models: win_len = 3072 batch_size = 25 epochs = 30 arr_summaries = [] stats_dict = {} xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.33, random_state=42) # iterate through all the models... for make_model, model_description in arr_models.items(): try: model = None model = make_model() history = train(model, model_name=model_description, x_test=xtest, x_train=xtrain, y_test=ytest, y_train=ytrain, win_len=win_len, batch_size=batch_size, epochs=epochs) summary = { "model_name": model_description, "loss": history.history['loss'][-1], "val_loss": history.history['val_loss'][-1], "PPV": history.history['PPV'][-1], "val_PPV": history.history['val_PPV'][-1], "se": history.history['Se'][-1], "val_se": history.history['val_Se'][-1] } except Exception: logging.error("ERROR OCCURED IN MODEL " + model_description) continue logging.info(str(summary)) arr_summaries.append(summary) pred_test = np.array(model.predict(xtest)) stats = statistics(ytest[:, 1000:4000], pred_test[:, 1000:4000]).round(4) stats_dict[model_description] = stats stats.to_csv("stats_" + model_description + '.txt') print(stats) # save results into file: table_summaries = pd.DataFrame(arr_summaries) table_summaries.to_csv(experiment_res_name + '.txt', header=True, index=True, sep='\t', mode='a') logging.info( "STATISTICS------------------------------------------------------") logging.info(stats_dict) print(table_summaries) os.chdir(cwd)
def draw_one(model_path, x, y, patients, win_len): """ print F1_score, plot ECG annotation of the network and ground true :param model_path: path to the trained model :param x: array of ECG :param y: array of annotation :param pacients: list of patients numbers to be plotted """ for pacient in patients: offsets = (5000 - win_len) // 2 model = load_model(model_path) X = np.expand_dims(x[pacient, :, :], axis=0) Y = np.expand_dims(y[pacient, offsets:5000 - offsets, :], axis=0) prediction = np.array(model.predict(X)) prediction = prediction[:, offsets:5000 - offsets, :] x_axis = np.arange(offsets / 500, (win_len + offsets) / 500, 1 / 500) plt.figure(figsize=(20, 5)) plt.plot(x_axis, x[pacient, offsets:5000 - offsets, 0], 'k') predict_rounded = np.argmax(prediction, axis=2)[pacient] one_hot = np.zeros((predict_rounded.size, predict_rounded.max() + 1)) one_hot[np.arange(predict_rounded.size), predict_rounded] = 1 plt.fill_between(x_axis, Y[0, :win_len, 1] * 40 + -50, -50, color='r', alpha=0.3) plt.fill_between(x_axis, Y[0, :win_len, 2] * 40 + -50, -50, color='g', alpha=0.3) plt.fill_between(x_axis, Y[0, :win_len, 0] * 40 + -50, -50, color='b', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 1] * 40), 0, color='r', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 2] * 40), 0, color='g', alpha=0.3) plt.fill_between(x_axis, list(one_hot[:win_len, 0] * 40), 0, color='b', alpha=0.3) stat = statistics(Y, prediction) F = F_score(stat) print(stat) print(F) plt.show()
model_paths_list = [ join(path_to_models, f) for f in listdir(path_to_models) if isfile(join(path_to_models, f)) ] xy = load_dataset() X = xy["x"] Y = xy["y"] xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.33, random_state=42) train_judge(model_paths_list, xtest, xtrain, ytest, ytrain, 0) pred_j = ensemble_predict_with_judge( model_paths_list, path_to_ensemble_models + '\\judge_model0', xtest) pred_e = ensemble_predict(model_paths_list, xtest) stat_j = statistics(ytest[:, win_len // 2:5000 - win_len // 2, :], pred_j[:, win_len // 2:5000 - win_len // 2, :]) print("ensemble with judge:") print(stat_j) stat_e = statistics(ytest[:, win_len // 2:5000 - win_len // 2, :], pred_e[:, win_len // 2:5000 - win_len // 2, :]) print("simple ensemble:") print(stat_e) #ranging(pred_e, xtest, ytest, win_len, col= "k", is_path = False) #plt.savefig("ensmodel"+str(i)+".jpg") #plt.clf()