def feature_reduction(): pre_process_id = request.args.get("id") pre_process = UserData.get_preprocess_from_id(pre_process_id) if pre_process is None: return redirect('/pre') if pre_process['avg_symbol_df_path']: avg_symbol_df_path = Path(pre_process['avg_symbol_df_path']) file_path = Path(pre_process['file_path']) p_fold_df = PreProcess.get_pvalue_fold_df(avg_symbol_df_path, file_path) else: # From step1 file_path = Path(pre_process['file_path']) p_fold_df = PreProcess.get_pvalue_fold_df(file_path) p_fold_df_path = USER_PATH / str( g.user["id"]) / 'tmp' / ('_p_fold_' + pre_process['file_name']) PreProcess.saveDF(p_fold_df, p_fold_df_path) pvalues_max = p_fold_df['pValues'].max() * 0.1 fold_max = p_fold_df['fold'].max() * 0.2 pvalues = np.linspace(0.001, 0.01, 19) pvalues = np.around(pvalues, decimals=4) folds = np.linspace(0.001, fold_max, 40) folds = np.around(folds, decimals=4) data_array = [pvalues, folds] volcano_hash = get_volcano_fig(p_fold_df['fold'], p_fold_df['pValues']) UserData.update_preprocess(pre_process['user_id'], pre_process['file_name'], 'volcano_hash', volcano_hash) return render_template("preprocess/step-5.html", data_array=data_array, volcano_hash=volcano_hash, pre_process_id=pre_process_id)
def norm(): norm_method = request.form.get("norm_mthd") null_rmv = request.form.get("null_rmv") pre_process_id = request.form.get("id") if norm_method and null_rmv and pre_process_id: pre_process = UserData.get_preprocess_from_id(pre_process_id) if pre_process is None: return redirect('/pre') user_id = pre_process['user_id'] UserData.update_preprocess(user_id, pre_process['file_name'], 'scaling', norm_method) UserData.update_preprocess(user_id, pre_process['file_name'], 'imputation', null_rmv) if pre_process['merge_df_path'] == '': merge_df_path = Path(pre_process['file_path']) df = PreProcess.getDF(merge_df_path) df = df.drop(['class'], axis=1) df = df.T df = df.reset_index() else: merge_df_path = Path(pre_process['merge_df_path']) df = PreProcess.getDF(merge_df_path) df = PreProcess.step3(df, norm_method, null_rmv) # symbol_df avg_symbol_name = "avg_symbol_" + pre_process['file_name'] avg_symbol_df_path = USER_PATH / str( g.user["id"]) / "tmp" / avg_symbol_name avg_symbol_df_path_str = avg_symbol_df_path.as_posix() PreProcess.saveDF(df, avg_symbol_df_path_str) UserData.update_preprocess(user_id, pre_process['file_name'], 'avg_symbol_df_path', avg_symbol_df_path_str) data = session[pre_process['file_name']] data = PreProcess.add_details_json(data, df, "r1") session[pre_process['file_name']] = data if len(df.columns) > 100: df_view = df.iloc[:, 0:100].head(15) else: df_view = df.head(15) return render_template("preprocess/step-4.html", tablesstep4=[df_view.to_html(classes='data')], details=data, pre_process_id=pre_process_id, file_name=avg_symbol_name) return redirect('/pre')
def get_reduce_features_from_pvalues(): fold = request.form["fold-range"] pvalue = request.form["p-value"] pre_process_id = request.form["id"] pre_process = UserData.get_preprocess_from_id(pre_process_id) p_fold_df_path = USER_PATH / str( g.user["id"]) / 'tmp' / ('_p_fold_' + pre_process['file_name']) p_fold_df = PreProcess.getDF(p_fold_df_path) if pre_process['avg_symbol_df_path']: df = PreProcess.get_filtered_df_pvalue( p_fold_df, pre_process['avg_symbol_df_path'], float(pvalue), float(fold)) else: # From step1 skip df = PreProcess.get_filtered_df_pvalue(p_fold_df, pre_process['file_path'], float(pvalue), float(fold), 0) fr_df_path = USER_PATH / str( g.user["id"]) / 'tmp' / ('fr_' + pre_process['file_name']) PreProcess.saveDF(df, fr_df_path) length = len(df.columns) if length <= 150: split_array = np.array([length]) elif length < 350: split_array = np.arange(150, int(length / 10) * 10, 10) else: split_array = np.linspace(150, 350, 21) split_array = split_array.astype(int) # Get classification Results df_y = PreProcess.getDF(Path(pre_process['file_path'])) y = df_y['class'] y = pd.to_numeric(y) classification_result_df = FeatureReduction.get_classification_results( df, y) cls_id, cls_name = FeatureReduction.get_best_cls(classification_result_df) classification_result_df = classification_result_df.drop(['Training'], axis=1) classification_result_df = classification_result_df.sort_values( by=['Testing'], ascending=False) classification_result_df = classification_result_df.set_index( ['Classifiers']) classification_result_df.index.name = None classification_result_df = classification_result_df.rename( columns={"Testing": "Testing Accuracy /%"}) fs_fig_hash = get_feature_selection_fig(df, df_y, length) UserData.update_preprocess(pre_process['user_id'], pre_process['file_name'], 'reduce_df_path', fr_df_path.as_posix()) UserData.update_preprocess(pre_process['user_id'], pre_process['file_name'], 'classifiers', cls_id) return render_template( "preprocess/step-6.html", split_array=split_array, fs_fig_hash=fs_fig_hash, tables=[classification_result_df.to_html(classes='data')], cls_names=cls_name, pre_process_id=pre_process_id)