def metrics(true_path, pred_path, do_rescale=False): true_df = pd.read_csv(true_path, index_col=["slide", "rid"]).sort_index() if isinstance(pred_path, (str, Path)): pred_df = pd.read_csv(pred_path, index_col=["slide", "rid"]).sort_index() else: pred_df = pred_path p_col = "y" if "y" in true_df.columns else "p" mse = mean_squared_error(true_df.loc[pred_df.index, p_col], pred_df["p"]) pprob = predprob(true_df.loc[pred_df.index, p_col], pred_df["p"]) print("mse={:0.3f}, predprob={:0.3f}".format(mse, pprob)) if do_rescale: pred_df = rescale(pred_df) mse = mean_squared_error(true_df.loc[pred_df.index, p_col], pred_df["p"]) pprob = predprob(true_df.loc[pred_df.index, p_col], pred_df["p"]) print("mse={:0.3f}, predprob={:0.3f}".format(mse, pprob))
def evaluate(true, pred): mse = mean_squared_error(true, pred) mae = mean_absolute_error(true, pred) pprob = predprob(true, pred) print("MSE:", mse, "\nMAE:", mae, "\nPPROB:", pprob)
def train_model(): print('Start: Training Model') root_path = os.getcwd() train_path = os.path.join(root_path, *TRAINING_SPLIT_DATA_PATH) test_path = os.path.join(root_path, *TEST_SPLIT_DATA_PATH) if len(VALIDATION_SPLIT_DATA_PATH) > 0: val_path = os.path.join(root_path, *VALIDATION_SPLIT_DATA_PATH) history_name = '{}_ep{}_bs{}.csv'.format(HISTORY_PREFIX, NUM_EPOCHS, BATCH_SIZE) history_file = os.path.join(root_path, *HISTORY_TRAINING_PATH) history_file = os.path.join(history_file, history_name) print('Preparing Data') # Get Training Data print('Retrieving Data for Training') train_features = None train_label = None data_retrieved = False for file_name in os.listdir(train_path): file_path = os.path.join(train_path, file_name) if os.path.isfile(file_path): if DATA_FILE == file_name[:-6]: data_retrieved = True np_file = np.load(file_path) if train_features is None: train_features = np_file['dataset'] train_label = np_file['labels'] else: train_features = np.concatenate( (train_features, np_file['dataset']), axis=0) train_label = np.concatenate( (train_label, np_file['labels']), axis=0) if not data_retrieved: print('Retrieved No Training Data') if len(VALIDATION_SPLIT_DATA_PATH) == 0: print('Splitting Training Dataset Into Training and Validation') x_train, x_val, y_train, y_val = data_manager.train_val_split( train_features, train_label, val_ratio=VALIDATION_RATIO) else: print('Retrieving Data for Validation') x_train = train_features y_train = train_label x_val = None y_val = None data_retrieved = False for file_name in os.listdir(val_path): file_path = os.path.join(val_path, file_name) if os.path.isfile(file_path): if DATA_FILE == file_name[:-6]: data_retrieved = True np_file = np.load(file_path) if x_val is None: x_val = np_file['dataset'] y_val = np_file['labels'] else: x_val = np.concatenate((x_val, np_file['dataset']), axis=0) y_val = np.concatenate((y_val, np_file['labels']), axis=0) if not data_retrieved: print('Retrieved No Validation Data') # Get Testing Dataset print('Retrieving Data for Testing') test_features = None test_label = None data_retrieved = False for file_name in os.listdir(test_path): file_path = os.path.join(test_path, file_name) if os.path.isfile(file_path): if DATA_FILE == file_name[:-6]: data_retrieved = True np_file = np.load(file_path) if test_features is None: test_features = np_file['dataset'] test_label = np_file['labels'] else: test_features = np.concatenate( (test_features, np_file['dataset']), axis=0) test_label = np.concatenate( (test_label, np_file['labels']), axis=0) if not data_retrieved: print('Retrieved No Testing Data') # Use validation as test data x_test = test_features y_test = test_label print('Preparing_Model') model_path = os.path.join(root_path, *MODELS_PATH) model_temp_path = os.path.join(model_path, TEMP_MODEL_DIR) if not os.path.exists(model_temp_path): os.makedirs(model_temp_path) if len(PRE_TRAINED_MODEL) == 0: print('Creating New Model') model_input = Input(INPUT_SHAPE) x = inception.build_inception_v4( model_input, enable_reduction=ENABLE_MODEL_REDUCTION) model = Model(model_input, x, name='inception_v4') if len(LOAD_WEIGHTS_NEW_MODEL) > 0: weights2load = os.path.join(root_path, *LOAD_WEIGHTS_NEW_MODEL) model.load_weights(weights2load) else: print('Loading Existing Model') pre_model_path = os.path.join(root_path, *PRE_TRAINED_MODEL) cus_obj = None if LOSS_FUNCTION == 'root_mean_squared_error': cus_obj = {'root_mean_squared_error': root_mean_squared_error} model = load_model(pre_model_path, custom_objects=cus_obj) if LOSS_FUNCTION == 'root_mean_squared_error': model.compile(loss=root_mean_squared_error, optimizer=LEARNING_OPTIMIZER, metrics=['mae']) else: model.compile(loss=LOSS_FUNCTION, optimizer=LEARNING_OPTIMIZER, metrics=['mae']) print(model.summary()) print('Training Model') best_val_scores = [] best_avrg_scores = [] best_val_dic = {} best_avrg_dic = {} nb_mirror_aug = 1 if ENABLE_VERTICAL_MIRROR: nb_mirror_aug += 1 if ENABLE_HORIZONTAL_MIRROR: nb_mirror_aug += 1 # Generate Fulls Labels for all rotations and mirrors y_val_full = [] y_train_full = [] for j in range(nb_mirror_aug): for i in range(NB_ROTATION): y_val_full = np.concatenate((y_val_full, y_val[:, 2])) for i in range(NB_ROTATION): y_train_full = np.concatenate((y_train_full, y_train[:, 2])) start = time.time() print('Train on {} samples, validate on {} samples'.format( len(y_train_full), len(y_val_full))) # Create History Log File with codecs.open(history_file, "w", encoding="utf-8") as history_csv: history_csv.write( 'epoch,time,train_rmse,train_pk,val_rmse,val_pk, avg_pk\n') for i in range(1, NUM_EPOCHS + 1): epoch_start = time.time() print('Epoch {}/{}'.format(i, NUM_EPOCHS)) # Train on mirror train_preds = [] val_preds = [] train_rmse = 0 for j in range(nb_mirror_aug): # Mirror Image if enabled # First = Original # Second = Vertical (If Vertical not enable then horizontal) # Third = Horizontal (If both enabled) if j == 0: x_mirror_train = x_train x_mirror_val = x_val elif j == 1: if ENABLE_VERTICAL_MIRROR: x_mirror_train = np.flip(x_train, 2) x_mirror_val = np.flip(x_val, 2) elif ENABLE_HORIZONTAL_MIRROR: x_mirror_train = np.flip(x_train, 3) x_mirror_val = np.flip(x_val, 3) elif j == 2: x_mirror_train = np.flip(x_train, 3) x_mirror_val = np.flip(x_val, 3) # Rotate Image 90 Degrees for k in range(NB_ROTATION): history = model.fit(np.rot90(x_mirror_train, k, axes=(2, 3)), y_train[:, 2], verbose=0, epochs=1, batch_size=BATCH_SIZE) train_rmse += history.history['loss'][0] temp_preds = model.predict( np.rot90(x_mirror_train, k, axes=(2, 3))) temp_preds = temp_preds.reshape(temp_preds.shape[0]) # temp_preds = np.nan_to_num(temp_preds) train_preds = np.concatenate((train_preds, temp_preds)) for k in range(NB_ROTATION): temp_preds = model.predict( np.rot90(x_mirror_val, k, axes=(2, 3))) temp_preds = temp_preds.reshape(temp_preds.shape[0]) # temp_preds = np.nan_to_num(temp_preds) val_preds = np.concatenate((val_preds, temp_preds)) train_rmse = train_rmse / (nb_mirror_aug * NB_ROTATION) train_pk = prediction_probability.predprob(y_train_full, train_preds) val_pk = prediction_probability.predprob(y_val_full, val_preds) val_rmse = sqrt(mean_squared_error(y_val_full, val_preds)) epoch_end = time.time() if isinstance(train_pk, tuple): train_pk = 0 if isinstance(val_pk, tuple): val_pk = 0 avrg_pk = (train_pk + val_pk) / 2 print( ' - {}s - train_rmse: {:.6f} - train_pk: {:.6f} - val_rmse: {:.6f} - val_pk: {:.6f} - average_pk: {:.6f}' .format(int(epoch_end - epoch_start), train_rmse, train_pk, val_rmse, val_pk, avrg_pk)) with codecs.open(history_file, "a", encoding="utf-8") as history_csv: history_csv.write( '{},{},{:.6f},{:.6f},{:.6f},{:.6f},{:.6f}\n'.format( i, int(epoch_end - epoch_start), train_rmse, train_pk, val_rmse, val_pk, avrg_pk)) # Save best validation rmse if len(best_val_scores) < TOP_K_MODEL_SAVE: # Save first K models # [rmse, val_pk, train_pk, train_rmse, index_id] val_info = [None, 0, 0, 0, 0] val_info[0] = val_rmse val_info[1] = val_pk val_info[2] = train_pk val_info[3] = train_rmse index = len(best_val_scores) val_info[4] = index best_val_scores.append(val_rmse) best_val_dic[str(val_rmse)] = val_info temp_best_val_weights = os.path.join( model_temp_path, 'temp_best_rmse_weights{}.hdf5'.format(index)) model.save_weights(temp_best_val_weights) else: best_val_scores.append(val_rmse) best_val_scores = sorted(best_val_scores) if best_val_scores[TOP_K_MODEL_SAVE] != val_rmse: info2remove = best_val_dic.pop( str(best_val_scores[TOP_K_MODEL_SAVE]), None) # Save if new top K model # [rmse, val_pk, train_pk, train_rmse, index_id] val_info = [None, 0, 0, 0, 0] val_info[0] = val_rmse val_info[1] = val_pk val_info[2] = train_pk val_info[3] = train_rmse if info2remove is not None: index = info2remove[4] val_info[4] = index best_val_dic[str(val_rmse)] = val_info temp_best_val_weights = os.path.join( model_temp_path, 'temp_best_rmse_weights{}.hdf5'.format(index)) model.save_weights(temp_best_val_weights) else: temp_best_avrg_weights = os.path.join( model_temp_path, 'temp_best_rmse_weights_train_rmse({:.6f})_val-pk({:.6f})_train-pk({:.6f}).hdf5' .format(train_rmse, val_pk, train_pk)) model.save_weights(temp_best_avrg_weights) best_val_scores = best_val_scores[:-1] # Save if best average PK if len(best_avrg_scores) < TOP_K_MODEL_SAVE: # Save first K models # [avrg_pk, val_pk, train_pk, train_rmse, index_id] val_info = [None, 0, 0, 0, 0] val_info[0] = avrg_pk val_info[1] = val_pk val_info[2] = train_pk val_info[3] = train_rmse index = len(best_avrg_scores) val_info[4] = index best_avrg_scores.append(avrg_pk) best_avrg_dic[str(avrg_pk)] = val_info temp_best_avrg_weights = os.path.join( model_temp_path, 'temp_best_avrg_weights{}.hdf5'.format(index)) model.save_weights(temp_best_avrg_weights) else: best_avrg_scores.append(avrg_pk) best_avrg_scores = sorted(best_avrg_scores, reverse=True) if best_avrg_scores[TOP_K_MODEL_SAVE] != avrg_pk: # Save if new top K model info2remove = best_avrg_dic.pop( str(best_avrg_scores[TOP_K_MODEL_SAVE]), None) # [rmse, val_pk, train_pk, train_rmse, index_id] val_info = [None, 0, 0, 0, 0] val_info[0] = avrg_pk val_info[1] = val_pk val_info[2] = train_pk val_info[3] = train_rmse if info2remove is not None: index = info2remove[4] val_info[4] = index best_avrg_dic[str(avrg_pk)] = val_info temp_best_avrg_weights = os.path.join( model_temp_path, 'temp_best_avrg_weights{}.hdf5'.format(index)) model.save_weights(temp_best_avrg_weights) else: temp_best_avrg_weights = os.path.join( model_temp_path, 'temp_best_avrg_weights_train_rmse({:.6f})_val-pk({:.6f})_train-pk({:.6f}).hdf5' .format(train_rmse, val_pk, train_pk)) model.save_weights(temp_best_avrg_weights) best_avrg_scores = best_avrg_scores[:-1] end = time.time() print('Training Time: {}s'.format(int(end - start))) ''' print('Saving Last Model:') final_model_name = 'model_final_e({})_bs({})_mr({})_train_rmse({:.6f})_train-pk({:.6f})_val-rmse({:.6f})_val-pk({:.6f}).hdf5'.format( NUM_EPOCHS, BATCH_SIZE, ENABLE_MODEL_REDUCTION, train_rmse, train_pk, val_rmse, val_pk) model.save(os.path.join(model_path, final_model_name)) ''' print('Evaluating Model') # Write Prediction Log def log_preds(y, y_, file): with codecs.open(file, "w", encoding="utf-8") as pred_csv: pred_csv.write('y_true,y_pred\n') for l in range(len(y)): pred_csv.write('{},{}\n'.format(y[l], y_[l])) # Write Summary Log log_file = os.path.join(root_path, *SUMMARY_TRAINING_LOG) if not os.path.exists(log_file): with codecs.open(log_file, "a", encoding="utf-8") as log_csv: log_csv.write( 'model_version,i,nb_epochs,batch_size,nb_rotations,mirror_vertical,mirror_horizontal,model_reduction,test_rmse,test_pk,val_pk,train_pk,train_rmse\n' ) # Create Directory to save all top k models save_model_path = os.path.join( model_path, 'top_models-ep({})-bs({})-r({})-rm({})-rd({})'.format( NUM_EPOCHS, BATCH_SIZE, NB_ROTATION, ENABLE_HORIZONTAL_MIRROR or ENABLE_VERTICAL_MIRROR, ENABLE_MODEL_REDUCTION)) if not os.path.exists(save_model_path): os.makedirs(save_model_path) # Create Directory to save all prediction logs pred_path = os.path.join(root_path, *HISTORY_TRAINING_PATH) pred_path = os.path.join( pred_path, '{}_ep{}_bs{}_r({})_rm({})_mr({})'.format( PREDICTION_LOG, NUM_EPOCHS, BATCH_SIZE, NB_ROTATION, ENABLE_HORIZONTAL_MIRROR or ENABLE_VERTICAL_MIRROR, ENABLE_MODEL_REDUCTION)) if not os.path.exists(pred_path): os.makedirs(pred_path) # Evaluate Best Validation Models for best_val_score in best_val_dic.values(): itr = best_val_score[4] temp_best_val_weights = os.path.join( model_temp_path, 'temp_best_rmse_weights{}.hdf5'.format(best_val_score[4])) model.load_weights(temp_best_val_weights) val_preds = model.predict(x_test) val_preds = val_preds.reshape(val_preds.shape[0]) val_preds = np.nan_to_num(val_preds) p_k = prediction_probability.predprob(y_test[:, 2], val_preds) test_rmse_val = sqrt(mean_squared_error(y_test[:, 2], val_preds)) print('Test P_K Score (Val Model {}):'.format(itr)) val_str_pk = '{:.6f}'.format(p_k) print(val_str_pk) best_model_name = 'model_rmse_e({})_bs({})_mr({})_i({})_test-rmse({:.6f})_train_rmse({:.6f})_test-pk({})_val-pk({:.6f})_train-pk({:.6f}).hdf5'.format( NUM_EPOCHS, BATCH_SIZE, ENABLE_MODEL_REDUCTION, itr, test_rmse_val, best_val_score[3], val_str_pk, best_val_score[1], best_val_score[2]) model.save(os.path.join(save_model_path, best_model_name)) pred_name_file = '{}_val_ep{}_bs{}_mr({})_i({}).csv'.format( PREDICTION_LOG, NUM_EPOCHS, BATCH_SIZE, ENABLE_MODEL_REDUCTION, itr) pred_file = os.path.join(pred_path, pred_name_file) log_preds(y_test[:, 2], val_preds, pred_file) with codecs.open(log_file, "a", encoding="utf-8") as log_csv: log_csv.write( 'rmse,{},{},{},{},{},{},{},{:.6f},{},{:.6f},{:.6f},{:.6f}\n'. format(NUM_EPOCHS, itr, BATCH_SIZE, NB_ROTATION, ENABLE_VERTICAL_MIRROR, ENABLE_HORIZONTAL_MIRROR, ENABLE_MODEL_REDUCTION, test_rmse_val, val_str_pk, best_val_score[1], best_val_score[2], best_val_score[3])) # Evaluate Best Average Models for best_avrg_score in best_val_dic.values(): itr = best_avrg_score[4] temp_best_avg_weights = os.path.join( model_temp_path, 'temp_best_avrg_weights{}.hdf5'.format(best_avrg_score[4])) model.load_weights(temp_best_avg_weights) avg_preds = model.predict(x_test) avg_preds = avg_preds.reshape(avg_preds.shape[0]) avg_preds = np.nan_to_num(avg_preds) p_k = prediction_probability.predprob(y_test[:, 2], avg_preds) test_rmse_avg = sqrt(mean_squared_error(y_test[:, 2], avg_preds)) print('Test P_K Score (Avg Model {}):'.format(itr)) avg_str_pk = '{:.6f}'.format(p_k) print(avg_str_pk) best_model_name = 'model_average_e({})_bs({})_mr({})_i({})_test-rmse({:.6f})_train_rmse({:.6f})_test-pk({})_val-pk({:.6f})_train-pk({:.6f}).hdf5'.format( NUM_EPOCHS, BATCH_SIZE, ENABLE_MODEL_REDUCTION, itr, test_rmse_avg, best_val_score[3], avg_str_pk, best_avrg_score[1], best_avrg_score[2]) model.save(os.path.join(save_model_path, best_model_name)) pred_name_file = '{}_avg_ep{}_bs{}_mr({})_i({}).csv'.format( PREDICTION_LOG, NUM_EPOCHS, BATCH_SIZE, ENABLE_MODEL_REDUCTION, itr) pred_file = os.path.join(pred_path, pred_name_file) log_preds(y_test[:, 2], avg_preds, pred_file) with codecs.open(log_file, "a", encoding="utf-8") as log_csv: log_csv.write( 'average,{},{},{},{},{},{},{},{:.6f},{},{:.6f},{:.6f},{:.6f}\n' .format(NUM_EPOCHS, itr, BATCH_SIZE, NB_ROTATION, ENABLE_VERTICAL_MIRROR, ENABLE_HORIZONTAL_MIRROR, ENABLE_MODEL_REDUCTION, test_rmse_avg, avg_str_pk, best_avrg_score[1], best_avrg_score[2], best_avrg_score[3])) print('Done: Training Model')
def predict_test_data(): print('Start: Predicting Test Data') print('Preparing Data') root_path = os.getcwd() test_path = os.path.join(root_path, *TEST_DATA_PATH) test_features = None ids = None for file_name in os.listdir(test_path): file_path = os.path.join(test_path, file_name) if os.path.isfile(file_path): if DATA_FILE in file_name: np_file = np.load(file_path) if test_features is None: test_features = np_file['dataset'] ids = np_file['labels'] else: test_features = np.concatenate( (test_features, np_file['dataset']), axis=0) ids = np.concatenate((ids, np_file['labels']), axis=0) print('Load Model') model_path = os.path.join(root_path, *MODELS_PATH) pred_model_name = os.path.join(model_path, PREDICT_MODEL) predict_model = load_model(pred_model_name) print('Preparing Predictions') preds = predict_model.predict(test_features) preds = preds.reshape(preds.shape[0]) pred_dic = {} for i in range(len(ids)): slide = str(int(ids[i, 0])) rid = str(int(ids[i, 1])) p = preds[i] info = '{}_{}'.format(slide, rid) pred_dic[info] = p print('Calculating P_K Score') p_k = prediction_probability.predprob(ids[:, 2], preds) print('P_K Score:') if not isinstance(p_k, tuple): str_pk = '{:.6f}'.format(p_k) else: str_pk = 'nan' print(str_pk) print('Generating Submission File') data_rows = [] template = os.path.join(root_path, *OUTPUT_TEMPLATE) with codecs.open(template, "r", encoding="utf-8") as temp_file: reader = csv.reader(temp_file) next(reader, None) for row in reader: slide = row[0] rid = row[1] info = '{}_{}'.format(slide, rid) p = pred_dic.get(info, '0') data = '{},{},{}\n'.format(slide, rid, p) data_rows.append(data) output_file = os.path.join(root_path, *OUTPUT_FILE) with codecs.open(output_file, "w", encoding="utf-8") as submission_file: submission_file.write('slide,rid,p\n') for row in data_rows: submission_file.write(row) submission_file.close() print('Done: Predicting Test Data')
def train_model(): print('Start: Training Model') root_path = os.getcwd() train_path = os.path.join(root_path, *TRAINING_SPLIT_DATA_PATH) test_path = os.path.join(root_path, *TEST_SPLIT_DATA_PATH) if len(VALIDATION_SPLIT_DATA_PATH) > 0: val_path = os.path.join(root_path, *VALIDATION_SPLIT_DATA_PATH) history_name = '{}_ep{}_bs{}.csv'.format(HISTORY_PREFIX, NUM_EPOCHS, BATCH_SIZE) history_file = os.path.join(root_path, *HISTORY_TRAINING_PATH) history_file = os.path.join(history_file, history_name) print('Preparing Data') # Get Training Data print('Retrieving Data for Training') train_features = None train_label = None data_retrieved = False for file_name in os.listdir(train_path): file_path = os.path.join(train_path, file_name) if os.path.isfile(file_path): if DATA_FILE == file_name[:-6]: data_retrieved = True np_file = np.load(file_path) if train_features is None: train_features = np_file['dataset'] train_label = np_file['labels'] else: train_features = np.concatenate((train_features, np_file['dataset']), axis=0) train_label = np.concatenate((train_label, np_file['labels']), axis=0) if not data_retrieved: print('Retrieved No Training Data') if len(VALIDATION_SPLIT_DATA_PATH) == 0: print('Splitting Training Dataset Into Training and Validation') x_train, x_val, y_train, y_val = data_manager.train_val_split(train_features, train_label, val_ratio=VALIDATION_RATIO) else: print('Retrieving Data for Validation') x_train = train_features y_train = train_label x_val = None y_val = None data_retrieved = False for file_name in os.listdir(val_path): file_path = os.path.join(val_path, file_name) if os.path.isfile(file_path): if DATA_FILE == file_name[:-6]: data_retrieved = True np_file = np.load(file_path) if x_val is None: x_val = np_file['dataset'] y_val = np_file['labels'] else: x_val = np.concatenate((x_val, np_file['dataset']), axis=0) y_val = np.concatenate((y_val, np_file['labels']), axis=0) if not data_retrieved: print('Retrieved No Validation Data') # Get Testing Dataset print('Retrieving Data for Testing') test_features = None test_label = None data_retrieved = False for file_name in os.listdir(test_path): file_path = os.path.join(test_path, file_name) if os.path.isfile(file_path): if DATA_FILE == file_name[:-6]: data_retrieved = True np_file = np.load(file_path) if test_features is None: test_features = np_file['dataset'] test_label = np_file['labels'] else: test_features = np.concatenate((test_features, np_file['dataset']), axis=0) test_label = np.concatenate((test_label, np_file['labels']), axis=0) if not data_retrieved: print('Retrieved No Testing Data') # Use validation as test data x_test = test_features y_test = test_label print('Preparing_Model') model_path = os.path.join(root_path, *MODELS_PATH) model_temp_path = os.path.join(model_path, TEMP_MODEL_DIR) if not os.path.exists(model_temp_path): os.makedirs(model_temp_path) if len(PRE_TRAINED_MODEL) == 0: print('Creating New Model') model_input = Input(INPUT_SHAPE) x = inception.build_inception_v4(model_input, enable_reduction=ENABLE_MODEL_REDUCTION) model = Model(model_input, x, name='inception_v4') if len(LOAD_WEIGHTS_NEW_MODEL) > 0: print('Loading Weights From Prior Training Error') weights2load = os.path.join(root_path, *LOAD_WEIGHTS_NEW_MODEL) model.load_weights(weights2load) else: print('Loading Existing Model') pre_model_path = os.path.join(root_path, *PRE_TRAINED_MODEL) cus_obj = None if LOSS_FUNCTION == 'root_mean_squared_error': cus_obj = {'root_mean_squared_error': root_mean_squared_error} model = load_model(pre_model_path, custom_objects=cus_obj) if LOSS_FUNCTION == 'root_mean_squared_error': model.compile(loss=root_mean_squared_error, optimizer=LEARNING_OPTIMIZER, metrics=['mae']) else: model.compile(loss=LOSS_FUNCTION, optimizer=LEARNING_OPTIMIZER, metrics=['mae']) print(model.summary()) print('Evaluating Model') # Write Prediction Log def log_preds(y, y_, file): with codecs.open(file, "w", encoding="utf-8") as pred_csv: pred_csv.write('y_true,y_pred\n') for l in range(len(y)): pred_csv.write('{},{}\n'.format(y[l], y_[l])) # Write Summary Log log_file = os.path.join(root_path, *SUMMARY_TRAINING_LOG) if not os.path.exists(log_file): with codecs.open(log_file, "a", encoding="utf-8") as log_csv: log_csv.write( 'model_version,i,nb_epochs,batch_size,nb_rotations,mirror_vertical,mirror_horizontal,model_reduction,test_rmse,test_pk\n') # Create Directory to save all top k models save_model_path = os.path.join(model_path, 'top_models-test_only_ep({})-bs({})-r({})-rm({})-rd({})'.format(NUM_EPOCHS, BATCH_SIZE, NB_ROTATION, ENABLE_HORIZONTAL_MIRROR or ENABLE_VERTICAL_MIRROR, ENABLE_MODEL_REDUCTION)) if not os.path.exists(save_model_path): os.makedirs(save_model_path) # Create Directory to save all prediction logs pred_path = os.path.join(root_path, *HISTORY_TRAINING_PATH) pred_path = os.path.join(pred_path, '{}_test_only_ep{}_bs{}_r({})_rm({})_mr({})'.format(PREDICTION_LOG, NUM_EPOCHS, BATCH_SIZE, NB_ROTATION, ENABLE_HORIZONTAL_MIRROR or ENABLE_VERTICAL_MIRROR, ENABLE_MODEL_REDUCTION)) if not os.path.exists(pred_path): os.makedirs(pred_path) model_path = os.path.join(root_path, *MODELS_PATH) model_temp_path = os.path.join(model_path, TEMP_MODEL_DIR) if not os.path.exists(model_temp_path): os.makedirs(model_temp_path) # Evaluate Best Validation Models for file_name in os.listdir(model_temp_path): file_path = os.path.join(model_temp_path, file_name) if os.path.isfile(file_path): if 'rmse' in file_name: itr = int(file_name[22]) temp_best_val_weights = os.path.join(model_temp_path, 'temp_best_rmse_weights{}.hdf5'.format(itr)) model.load_weights(temp_best_val_weights) val_preds = model.predict(x_test) val_preds = val_preds.reshape(val_preds.shape[0]) val_preds = np.nan_to_num(val_preds) p_k = prediction_probability.predprob(y_test[:, 2], val_preds) test_rmse_val = sqrt(mean_squared_error(y_test[:, 2], val_preds)) print('Test P_K Score (Val Model {}):'.format(itr)) val_str_pk = '{:.6f}'.format(p_k) print(val_str_pk) best_model_name = 'model_rmse__e({})_bs({})_mr({})_i({})_test-rmse({:.6f})_test-pk({}).hdf5'.format( NUM_EPOCHS, BATCH_SIZE, ENABLE_MODEL_REDUCTION, itr, test_rmse_val, val_str_pk) model.save(os.path.join(save_model_path, best_model_name)) pred_name_file = '{}_val_ep{}_bs{}_mr({})_i({}).csv'.format(PREDICTION_LOG, NUM_EPOCHS, BATCH_SIZE, ENABLE_MODEL_REDUCTION, itr) pred_file = os.path.join(pred_path, pred_name_file) log_preds(y_test[:, 2], val_preds, pred_file) with codecs.open(log_file, "a", encoding="utf-8") as log_csv: log_csv.write( 'rmse,{},{},{},{},{},{},{},{:.6f},{}\n'.format(NUM_EPOCHS, itr, BATCH_SIZE, NB_ROTATION, ENABLE_VERTICAL_MIRROR, ENABLE_HORIZONTAL_MIRROR, ENABLE_MODEL_REDUCTION, test_rmse_val, val_str_pk)) elif 'avrg' in file_name: itr = int(file_name[22]) temp_best_avg_weights = os.path.join(model_temp_path, 'temp_best_avrg_weights{}.hdf5'.format(itr)) model.load_weights(temp_best_avg_weights) avg_preds = model.predict(x_test) avg_preds = avg_preds.reshape(avg_preds.shape[0]) avg_preds = np.nan_to_num(avg_preds) p_k = prediction_probability.predprob(y_test[:, 2], avg_preds) test_rmse_avg = sqrt(mean_squared_error(y_test[:, 2], avg_preds)) print('Test P_K Score (Avg Model {}):'.format(itr)) avg_str_pk = '{:.6f}'.format(p_k) print(avg_str_pk) best_model_name = 'model_average_e({})_bs({})_mr({})_i({})_test-rmse({:.6f})_test-pk({}).hdf5'.format( NUM_EPOCHS, BATCH_SIZE, ENABLE_MODEL_REDUCTION, itr, test_rmse_avg, avg_str_pk,) model.save(os.path.join(save_model_path, best_model_name)) pred_name_file = '{}_avg_ep{}_bs{}_mr({})_i({}).csv'.format(PREDICTION_LOG, NUM_EPOCHS, BATCH_SIZE, ENABLE_MODEL_REDUCTION, itr) pred_file = os.path.join(pred_path, pred_name_file) log_preds(y_test[:, 2], avg_preds, pred_file) with codecs.open(log_file, "a", encoding="utf-8") as log_csv: log_csv.write( 'average,{},{},{},{},{},{},{},{:.6f},{}\n'.format(NUM_EPOCHS, itr, BATCH_SIZE, NB_ROTATION, ENABLE_VERTICAL_MIRROR, ENABLE_HORIZONTAL_MIRROR, ENABLE_MODEL_REDUCTION, test_rmse_avg, avg_str_pk)) print('Done: Training Model')