def save_reduced_df(): features_count = request.form['features_count'] pre_process_id = request.form['id'] pre_process = UserData.get_preprocess_from_id(pre_process_id) df = PreProcess.getDF(Path(pre_process['reduce_df_path'])) df_y = PreProcess.getDF(Path(pre_process['file_path'])) y = df_y['class'] y = pd.to_numeric(y) df_selected = FeatureReduction.getSelectedFeatures(df, int(features_count), y) file_name = pre_process['file_name'] path = USER_PATH / str(g.user["id"]) / ('GeNet_' + file_name) PreProcess.saveDF(df_selected, path) # remove old files files = [ "merge_" + file_name, "avg_symbol_" + file_name, "_p_fold_" + file_name, "fr_" + file_name ] folder_path = USER_PATH / str(g.user["id"]) / "tmp" remove_files(folder_path, files) session[file_name] = None return redirect('/fs/?id=' + str(pre_process_id))
def norm(): norm_method = request.form.get("norm_mthd") null_rmv = request.form.get("null_rmv") pre_process_id = request.form.get("id") if norm_method and null_rmv and pre_process_id: pre_process = UserData.get_preprocess_from_id(pre_process_id) if pre_process is None: return redirect('/pre') user_id = pre_process['user_id'] UserData.update_preprocess(user_id, pre_process['file_name'], 'scaling', norm_method) UserData.update_preprocess(user_id, pre_process['file_name'], 'imputation', null_rmv) if pre_process['merge_df_path'] == '': merge_df_path = Path(pre_process['file_path']) df = PreProcess.getDF(merge_df_path) df = df.drop(['class'], axis=1) df = df.T df = df.reset_index() else: merge_df_path = Path(pre_process['merge_df_path']) df = PreProcess.getDF(merge_df_path) df = PreProcess.step3(df, norm_method, null_rmv) # symbol_df avg_symbol_name = "avg_symbol_" + pre_process['file_name'] avg_symbol_df_path = USER_PATH / str( g.user["id"]) / "tmp" / avg_symbol_name avg_symbol_df_path_str = avg_symbol_df_path.as_posix() PreProcess.saveDF(df, avg_symbol_df_path_str) UserData.update_preprocess(user_id, pre_process['file_name'], 'avg_symbol_df_path', avg_symbol_df_path_str) data = session[pre_process['file_name']] data = PreProcess.add_details_json(data, df, "r1") session[pre_process['file_name']] = data if len(df.columns) > 100: df_view = df.iloc[:, 0:100].head(15) else: df_view = df.head(15) return render_template("preprocess/step-4.html", tablesstep4=[df_view.to_html(classes='data')], details=data, pre_process_id=pre_process_id, file_name=avg_symbol_name) return redirect('/pre')
def get_feature_count_pval(): pvalue = request.args.get("pvalue") foldChange = request.args.get("foldChange") pre_process_id = request.args.get("id") pre_process = UserData.get_preprocess_from_id(pre_process_id) path = USER_PATH / str( g.user["id"]) / 'tmp' / ('_p_fold_' + pre_process['file_name']) p_fold_df = PreProcess.getDF(path) count = PreProcess.get_filtered_df_count_pvalue(p_fold_df, float(pvalue), float(foldChange)) return str(count)
def scaling_imputation(): pre_process_id = request.args.get("id") pre_process = UserData.get_preprocess_from_id(pre_process_id) if pre_process is None: return redirect('/pre') data = session.get(pre_process['file_name']) if data is not None: return render_template("preprocess/step-3.html", details=data, pre_process_id=pre_process_id) return redirect('/pre')
def feature_reduction(): pre_process_id = request.args.get("id") pre_process = UserData.get_preprocess_from_id(pre_process_id) if pre_process is None: return redirect('/pre') if pre_process['avg_symbol_df_path']: avg_symbol_df_path = Path(pre_process['avg_symbol_df_path']) file_path = Path(pre_process['file_path']) p_fold_df = PreProcess.get_pvalue_fold_df(avg_symbol_df_path, file_path) else: # From step1 file_path = Path(pre_process['file_path']) p_fold_df = PreProcess.get_pvalue_fold_df(file_path) p_fold_df_path = USER_PATH / str( g.user["id"]) / 'tmp' / ('_p_fold_' + pre_process['file_name']) PreProcess.saveDF(p_fold_df, p_fold_df_path) pvalues_max = p_fold_df['pValues'].max() * 0.1 fold_max = p_fold_df['fold'].max() * 0.2 pvalues = np.linspace(0.001, 0.01, 19) pvalues = np.around(pvalues, decimals=4) folds = np.linspace(0.001, fold_max, 40) folds = np.around(folds, decimals=4) data_array = [pvalues, folds] volcano_hash = get_volcano_fig(p_fold_df['fold'], p_fold_df['pValues']) UserData.update_preprocess(pre_process['user_id'], pre_process['file_name'], 'volcano_hash', volcano_hash) return render_template("preprocess/step-5.html", data_array=data_array, volcano_hash=volcano_hash, pre_process_id=pre_process_id)
def get_reduce_features_from_pvalues(): fold = request.form["fold-range"] pvalue = request.form["p-value"] pre_process_id = request.form["id"] pre_process = UserData.get_preprocess_from_id(pre_process_id) p_fold_df_path = USER_PATH / str( g.user["id"]) / 'tmp' / ('_p_fold_' + pre_process['file_name']) p_fold_df = PreProcess.getDF(p_fold_df_path) if pre_process['avg_symbol_df_path']: df = PreProcess.get_filtered_df_pvalue( p_fold_df, pre_process['avg_symbol_df_path'], float(pvalue), float(fold)) else: # From step1 skip df = PreProcess.get_filtered_df_pvalue(p_fold_df, pre_process['file_path'], float(pvalue), float(fold), 0) fr_df_path = USER_PATH / str( g.user["id"]) / 'tmp' / ('fr_' + pre_process['file_name']) PreProcess.saveDF(df, fr_df_path) length = len(df.columns) if length <= 150: split_array = np.array([length]) elif length < 350: split_array = np.arange(150, int(length / 10) * 10, 10) else: split_array = np.linspace(150, 350, 21) split_array = split_array.astype(int) # Get classification Results df_y = PreProcess.getDF(Path(pre_process['file_path'])) y = df_y['class'] y = pd.to_numeric(y) classification_result_df = FeatureReduction.get_classification_results( df, y) cls_id, cls_name = FeatureReduction.get_best_cls(classification_result_df) classification_result_df = classification_result_df.drop(['Training'], axis=1) classification_result_df = classification_result_df.sort_values( by=['Testing'], ascending=False) classification_result_df = classification_result_df.set_index( ['Classifiers']) classification_result_df.index.name = None classification_result_df = classification_result_df.rename( columns={"Testing": "Testing Accuracy /%"}) fs_fig_hash = get_feature_selection_fig(df, df_y, length) UserData.update_preprocess(pre_process['user_id'], pre_process['file_name'], 'reduce_df_path', fr_df_path.as_posix()) UserData.update_preprocess(pre_process['user_id'], pre_process['file_name'], 'classifiers', cls_id) return render_template( "preprocess/step-6.html", split_array=split_array, fs_fig_hash=fs_fig_hash, tables=[classification_result_df.to_html(classes='data')], cls_names=cls_name, pre_process_id=pre_process_id)
def view_merge_df(): id = request.args.get("id") user_id = g.user["id"] annotation_table = request.form.get("anno_tbl") col_sel_method = request.form.get("column_selection") file_name = request.form.get("available_files") if annotation_table and col_sel_method and file_name: file_path = USER_PATH / str(user_id) / file_name # Delete query if file already pre-processed UserData.delete_preprocess_file(user_id, file_name) if annotation_table == 'other': file = request.files['chooseFile'] if file and allowed_file(file.filename): annotation_table = secure_filename(file.filename) path_csv = ANNOTATION_TBL / "other" / (str(user_id) + "_" + annotation_table) # Delete same file uploaded result = UserData.get_user_file_by_file_name( user_id, annotation_table) annotation_df = pd.read_csv(file, usecols=[0, 1], header=0) col = annotation_df.columns if "ID" in col and "Gene Symbol" in col and len(col) == 2: annotation_df.to_csv(path_csv, index=False) else: flash( "Wrong Format: Gene Symbol and/or ID column not found in annotation table." ) return redirect('/pre') else: return abort(403) df = PreProcess.mergeDF(file_path, path_csv) if result is None: view_path = "/AnnotationTbls/other/" + str( user_id) + "_" + annotation_table UserData.add_file(annotation_table, annotation_table.split('.')[1], view_path, user_id, 1, 0) else: # load df annotation_table_path = UPLOAD_FOLDER.as_posix() + annotation_table df = PreProcess.mergeDF(file_path, Path(annotation_table_path)) if df is None: flash("Couldn't merge dataset with annotation table") return redirect('/pre') y = PreProcess.getDF(file_path) if 'class' not in y.columns: flash("Wrong Format: class column not found.") return redirect('/pre') y = y['class'] data = PreProcess.get_df_details(df, y) session[file_name] = data df = df.dropna(axis=0, subset=['Gene Symbol']) df = PreProcess.probe2Symbol(df, int(col_sel_method)) merge_name = "merge_" + file_name merge_path = USER_PATH / str(user_id) / "tmp" / merge_name merge_path_str = merge_path.as_posix() PreProcess.saveDF(df, merge_path_str) # save data to the Database UserData.add_preprocess(user_id, file_name, file_path.as_posix(), annotation_table, col_sel_method, merge_path_str, 0) pre_process_id = UserData.get_user_preprocess(user_id, file_name)['id'] # df = df.sort_values(df.columns[0], ascending=False) df = df.set_index([df.columns[0]]) df.columns.name = df.index.name df.index.name = None if len(df.columns) > 100: df_view = df.iloc[:, 0:100].head(15) else: df_view = df.head(15) return render_template("preprocess/step-2.html", tables=[df_view.to_html(classes='data')], details=data, pre_process_id=pre_process_id, file_name=merge_name) elif id: pre_process = UserData.get_preprocess_from_id(id) if pre_process and pre_process['merge_df_path']: merge_name = "merge_" + pre_process['file_name'] merge_path = Path(pre_process['merge_df_path']) df = PreProcess.getDF(merge_path) data = session[pre_process['file_name']] print(data) df = df.set_index([df.columns[0]]) df.columns.name = df.index.name df.index.name = None if len(df.columns) > 100: df_view = df.iloc[:, 0:100].head(15) else: df_view = df.head(15) return render_template("preprocess/step-2.html", tables=[df_view.to_html(classes='data')], details=data, pre_process_id=id, file_name=merge_name) return redirect('/pre')