def index(): result_id = request.args.get("id") if result_id: result = UserData.get_result_from_id(result_id) analysed_file = result['filename'] else: analysed_file = None s = -1 if request.method == "GET": s = request.args.get('s') a = request.args.get('a') user_id = g.user['id'] classifier_list = ["svmLinear", "svmGaussian", "randomForest"] all_result = UserData.get_result_to_validation(user_id) all_result = [r['filename'] for r in all_result] return render_template("modeling/index.html", available_list='', classifier_list=classifier_list, state=s, accuracy=a, all_result=all_result, analysed_file=analysed_file)
def predict(): user_id = g.user['id'] path = USER_PATH / str(g.user["id"]) list_names = [f for f in os.listdir(path) if os.path.isfile((path / f))] annotation_list = [] annotation_db = UserData.get_annotation_file(g.user["id"]) for f in annotation_db: annotation_list.append([f['file_name'], f['path']]) r = UserData.get_model(user_id) if r['accuracy'] is None: return redirect(url_for('modeling.index') + "?s=2") features = r['features'].split(',') trained_file = r['trained_file'] clasifier = r['clasifier'] accuracy = r['accuracy'] accuracy = str(round(float(accuracy), 2)) details = [features, trained_file, clasifier, accuracy] return render_template("modeling/predict.html", available_list=list_names, details=details, annotation_list=annotation_list)
def index_pdf(): path = USER_PATH / str(g.user["id"]) list_names = [f for f in os.listdir(path) if os.path.isfile((path / f))] preprocess_can_download = [] for file_name in list_names: results = UserData.get_user_can_download_preprocess( g.user["id"], file_name) if (results is None): preprocess_can_download.append(0) else: preprocess_can_download.append(results['can_download']) fs_can_download = [] for file_name in list_names: results_fs = UserData.get_can_download_result(g.user["id"], file_name) if (results_fs is None): fs_can_download.append(0) else: fs_can_download.append(results_fs['can_download_fs']) anlz_can_download = [] for file_name in list_names: results_anlz = UserData.get_can_download_result( g.user["id"], file_name) if (results_anlz is None): anlz_can_download.append(0) else: anlz_can_download.append(results_anlz['can_download_anlz']) return render_template("pdf/index.html", available_list=list_names, can_download_pre=preprocess_can_download, can_download_fs=fs_can_download, can_download_anlz=anlz_can_download)
def delete_user_account(): id = request.args.get('id') UserData.remove_user(id) dir_path = USER_PATH / str(id) delete_folder(dir_path) delete_user_file(id) return '1'
def update_user_disk_space(): # Check whether admin if is_not_admin(g.user): return abort('401') id = request.args.get('id') disk_space = request.args.get('disk_space') UserData.update_user_disk_space(id, disk_space) return "1"
def infrequent_files_delete(): ids = request.args.get('ids') id_array = ids.split(',') for id in id_array: delete_user_all_files(id) UserData.infrequent_users(ids) return "1"
def predict_results(): user_id = g.user['id'] is_default_model = request.form.get("is_default_model") if int(is_default_model): r = UserData.get_model(0) else: r = UserData.get_model(user_id) features = r['features'].split(',') selected_file = request.form["available_files"] df_path = USER_PATH / str(user_id) / selected_file df = PreProcess.getDF(df_path) is_norm = request.form.get("is_norm") is_map = request.form.get("is_map") if is_map == "true": annotation_file = request.form["anno_tbl"] annotation_table_path = UPLOAD_FOLDER.as_posix() + annotation_file df = PreProcess.mergeDF(df_path, Path(annotation_table_path)) df = df.dropna(axis=0, subset=['Gene Symbol']) df = PreProcess.probe2Symbol(df) df = PreProcess.step3(df, 'sklearn', 'drop') df = df.set_index(['Gene Symbol']) df = df.T elif is_norm == "true": df = get_norm_df(df) model_name = r['model_path_name'] e = ValidateUser.has_col(df.columns, features) if e is not None: return render_template("error.html", errors=e) result = get_predicted_result_df(user_id, model_name, df[features], is_default_model) result = result.astype(str) result[result == '0'] = 'Negative' result[result == '1'] = 'Positive' frame = {'ID': df.index, 'Predicted Result': result} out_result = pd.DataFrame(frame) save_path = USER_PATH / str(user_id) / "tmp" / "results.pkl" out_result.to_pickle(save_path) data = out_result['Predicted Result'].value_counts() return render_template( "modeling/predict-results.html", tables=[out_result.to_html(classes='display" id = "table_id')], data=data)
def save_reduced_df(): features_count = request.form['features_count'] pre_process_id = request.form['id'] pre_process = UserData.get_preprocess_from_id(pre_process_id) df = PreProcess.getDF(Path(pre_process['reduce_df_path'])) df_y = PreProcess.getDF(Path(pre_process['file_path'])) y = df_y['class'] y = pd.to_numeric(y) df_selected = FeatureReduction.getSelectedFeatures(df, int(features_count), y) file_name = pre_process['file_name'] path = USER_PATH / str(g.user["id"]) / ('GeNet_' + file_name) PreProcess.saveDF(df_selected, path) # remove old files files = [ "merge_" + file_name, "avg_symbol_" + file_name, "_p_fold_" + file_name, "fr_" + file_name ] folder_path = USER_PATH / str(g.user["id"]) / "tmp" remove_files(folder_path, files) session[file_name] = None return redirect('/fs/?id=' + str(pre_process_id))
def delete_user_file(user_id): files = UserData.get_user_file(user_id) for f in files: path = Path(f['path']) if os.path.exists( path ): os.remove(path)
def delete_infrequent_users(): emails = request.get_json() emails_str = ','.join(('"' + e + '"') for e in emails) query = "SELECT * FROM user WHERE username IN (" + emails_str + ")" db = get_db() result = db.execute(query).fetchall() for r in result: UserData.remove_user(r['id']) dir_path = USER_PATH / str(r['id']) delete_folder(dir_path) delete_user_file(r['id']) UserData.send_delete_msg(emails) return "1"
def delete_file(): id = request.args.get('id') name = request.args.get('name') UserData.delete_preprocess_file(id, name) UserData.delete_result(id, name) # UserData.delete_model(id, name) f_path = USER_PATH / id / name if os.path.exists(f_path): os.remove(f_path) return '1' return '0'
def analysis_pdf(): file_name = request.args.get("filename") anlz_details = UserData.get_result(g.user["id"], file_name) anlz_details_set = { "filename": anlz_details['filename'], "an_overlap_hash": anlz_details['an_overlap_hash'], "an_cls_hash": anlz_details['an_cls_hash'], "an_crr_hash": anlz_details['an_crr_hash'], "col_selected_method": anlz_details['col_selected_method'], "selected_method": anlz_details['selected_method'], "an_crr_1_hash": anlz_details['an_crr_1_hash'], "an_crr_2_hash": anlz_details['an_crr_2_hash'], "selected_roc_pic_hash": anlz_details['selected_roc_pic_hash'], "all_roc_pic_hash": anlz_details['an_crr_2_hash'], "col_overlapped": anlz_details['col_overlapped'] } return render_template("pdf/analysis_pdf.html", anlz_details_set=anlz_details_set, corr_classification_accuracy=anlz_details[ 'corr_classification_accuracy'], result_data_1=anlz_details['result_data_1'], result_data_2=anlz_details['result_data_2'])
def get_results_for_modeling(): filename = request.args.get('filename') user_id = g.user['id'] result = UserData.get_result(user_id, filename) result = '|'.join(str(r) for r in result) return result
def preprocessing_pdf(): file_name = request.args.get("filename") #preprocess = UserData.get_preprocess_from_id(id) preprocess = UserData.get_user_preprocess(g.user["id"], file_name) col_sel_method_set = ['Average', 'Max', 'Min', 'Interquartile range'] if (preprocess['col_sel_method'] == ''): preprocess_data = { "file_name": preprocess['file_name'], "annotation_table": '-', "prob_mthd": '-', "normalize": preprocess['scaling'], "imputation": preprocess['imputation'], "volcano_hash": preprocess['volcano_hash'], "fold": preprocess['fold'], "pvalue": preprocess['pvalue'], "univariate_length": preprocess['length'], "fr_univariate_hash": preprocess['fr_univariate_hash'] } else: preprocess_data = { "file_name": preprocess['file_name'], "annotation_table": (preprocess['annotation_table']).replace('/AnnotationTbls/', ''), "prob_mthd": col_sel_method_set[int(preprocess['col_sel_method']) - 1], "normalize": preprocess['scaling'], "imputation": preprocess['imputation'], "volcano_hash": preprocess['volcano_hash'], "fold": preprocess['fold'], "pvalue": preprocess['pvalue'], "univariate_length": preprocess['length'], "fr_univariate_hash": preprocess['fr_univariate_hash'] } preprocess_data_plot = { "volcano_hash": preprocess['volcano_hash'], "fold": preprocess['fold'], "pvalue": preprocess['pvalue'], "univariate_length": preprocess['length'], "fr_univariate_hash": preprocess['fr_univariate_hash'] } return render_template("pdf/preprocess_pdf.html", data=preprocess_data, data_after_norm=preprocess['after_norm_set'], data_plot=preprocess_data_plot, clf_results=preprocess['classification_result_set'])
def norm(): norm_method = request.form.get("norm_mthd") null_rmv = request.form.get("null_rmv") pre_process_id = request.form.get("id") if norm_method and null_rmv and pre_process_id: pre_process = UserData.get_preprocess_from_id(pre_process_id) if pre_process is None: return redirect('/pre') user_id = pre_process['user_id'] UserData.update_preprocess(user_id, pre_process['file_name'], 'scaling', norm_method) UserData.update_preprocess(user_id, pre_process['file_name'], 'imputation', null_rmv) if pre_process['merge_df_path'] == '': merge_df_path = Path(pre_process['file_path']) df = PreProcess.getDF(merge_df_path) df = df.drop(['class'], axis=1) df = df.T df = df.reset_index() else: merge_df_path = Path(pre_process['merge_df_path']) df = PreProcess.getDF(merge_df_path) df = PreProcess.step3(df, norm_method, null_rmv) # symbol_df avg_symbol_name = "avg_symbol_" + pre_process['file_name'] avg_symbol_df_path = USER_PATH / str( g.user["id"]) / "tmp" / avg_symbol_name avg_symbol_df_path_str = avg_symbol_df_path.as_posix() PreProcess.saveDF(df, avg_symbol_df_path_str) UserData.update_preprocess(user_id, pre_process['file_name'], 'avg_symbol_df_path', avg_symbol_df_path_str) data = session[pre_process['file_name']] data = PreProcess.add_details_json(data, df, "r1") session[pre_process['file_name']] = data if len(df.columns) > 100: df_view = df.iloc[:, 0:100].head(15) else: df_view = df.head(15) return render_template("preprocess/step-4.html", tablesstep4=[df_view.to_html(classes='data')], details=data, pre_process_id=pre_process_id, file_name=avg_symbol_name) return redirect('/pre')
def feature_reduction(): pre_process_id = request.args.get("id") pre_process = UserData.get_preprocess_from_id(pre_process_id) if pre_process is None: return redirect('/pre') if pre_process['avg_symbol_df_path']: avg_symbol_df_path = Path(pre_process['avg_symbol_df_path']) file_path = Path(pre_process['file_path']) p_fold_df = PreProcess.get_pvalue_fold_df(avg_symbol_df_path, file_path) else: # From step1 file_path = Path(pre_process['file_path']) p_fold_df = PreProcess.get_pvalue_fold_df(file_path) p_fold_df_path = USER_PATH / str( g.user["id"]) / 'tmp' / ('_p_fold_' + pre_process['file_name']) PreProcess.saveDF(p_fold_df, p_fold_df_path) pvalues_max = p_fold_df['pValues'].max() * 0.1 fold_max = p_fold_df['fold'].max() * 0.2 pvalues = np.linspace(0.001, 0.01, 19) pvalues = np.around(pvalues, decimals=4) folds = np.linspace(0.001, fold_max, 40) folds = np.around(folds, decimals=4) data_array = [pvalues, folds] volcano_hash = get_volcano_fig(p_fold_df['fold'], p_fold_df['pValues']) UserData.update_preprocess(pre_process['user_id'], pre_process['file_name'], 'volcano_hash', volcano_hash) return render_template("preprocess/step-5.html", data_array=data_array, volcano_hash=volcano_hash, pre_process_id=pre_process_id)
def delete_user_account(): id = request.args.get('id') id = int(id) #Check admin or same user if g.user['is_admin'] or g.user['id'] == id: if g.user['is_admin'] and g.user['id'] != id: email = UserData.get_user(id)['username'] UserData.send_delete_msg([email]) UserData.remove_user(id) dir_path = USER_PATH / str(id) delete_folder(dir_path) delete_user_file(id) return '1' else: return abort('401')
def create_model_pkl(user_id, filename, classifier, result): file_to_open = USER_PATH / str(user_id) / filename df = PreProcess.getDF(file_to_open) col_overlapped = result['col_overlapped'].split(',') col_selected_method = result['col_selected_method'].split(',') col_mo = list(dict.fromkeys(col_overlapped + col_selected_method)) col_mo_str = ','.join(e for e in col_mo) e = ValidateUser.has_col(df.columns, col_mo) if e is not None: return None y = df["class"] x = df[col_mo] if classifier == "svmLinear": clf = svm.SVC(kernel='linear') elif classifier == "svmGaussian": clf = SVC(kernel="rbf", gamma="auto", C=1) elif classifier == "randomForest": clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=42) else: return None clf.fit(x, y) scores = cross_val_score(clf, x, y, cv=3) score = round(scores.mean() * 100, 2) file_to_write = USER_PATH / str(user_id) / "tmp" / "_model.pkl" pickle.dump(clf, open(file_to_write, 'wb')) UserData.update_model(user_id, filename, classifier, col_mo_str, "_model.pkl", str(score)) return score
def get_feature_count_pval(): pvalue = request.args.get("pvalue") foldChange = request.args.get("foldChange") pre_process_id = request.args.get("id") pre_process = UserData.get_preprocess_from_id(pre_process_id) path = USER_PATH / str( g.user["id"]) / 'tmp' / ('_p_fold_' + pre_process['file_name']) p_fold_df = PreProcess.getDF(path) count = PreProcess.get_filtered_df_count_pvalue(p_fold_df, float(pvalue), float(foldChange)) return str(count)
def skip_df_mapping(): user_id = g.user['id'] file_name = request.args.get("selected_file") if not file_name: return redirect('./pre') file_path = USER_PATH / str(user_id) / file_name UserData.delete_preprocess_file(user_id, file_name) UserData.add_preprocess(user_id, file_name, file_path.as_posix(), '', '', '') pre_process_id = UserData.get_user_preprocess(user_id, file_name)['id'] df = PreProcess.getDF(file_path) data = PreProcess.get_df_details(df, None) session[file_name] = data return redirect( url_for('preprocess.scaling_imputation') + "?id=" + str(pre_process_id))
def create_model(): user_id = g.user['id'] available_file = request.form["available_files"] classifier = request.form["classifier"] available_result_file = request.form["available_result"] result = UserData.get_result(user_id, available_result_file) score = create_model_pkl(user_id, available_file, classifier, result) if score is None: return redirect('/mod/?s=0') else: return redirect('/mod/?s=1&a=' + str(score))
def scaling_imputation(): pre_process_id = request.args.get("id") pre_process = UserData.get_preprocess_from_id(pre_process_id) if pre_process is None: return redirect('/pre') data = session.get(pre_process['file_name']) if data is not None: return render_template("preprocess/step-3.html", details=data, pre_process_id=pre_process_id) return redirect('/pre')
def index(): annotation_list = [] path = USER_PATH / str(g.user["id"]) list_names = [f for f in os.listdir(path) if os.path.isfile((path / f))] annotation_db = UserData.get_annotation_file(g.user["id"]) for f in annotation_db: annotation_list.append([f['file_name'], f['path']]) if len(list_names) == 0: flash("Error: You don't have uploaded file.") return render_template("preprocess/step-1.html", available_list=list_names, annotation_list=annotation_list)
def feature_selection_pdf(): file_name = request.args.get("filename") feature_details = UserData.get_result(g.user["id"], file_name) feature_details_set = { "filename": feature_details['filename'], "fs_methods": feature_details['fs_methods'], "col_method1": feature_details['col_method1'], "col_method2": feature_details['col_method2'], "col_method3": feature_details['col_method3'] } return render_template("pdf/feature_pdf.html", feature_details=feature_details_set, venn_data=feature_details['venn_data_set'], fs_hash=feature_details['fs_hash'])
def get_files_for_modeling(): filename = request.args.get('filename') user_id = g.user['id'] result = UserData.get_result_for_modeling(user_id, filename) col_overlapped = result['col_overlapped'].split(',') col_selected_method = result['col_selected_method'].split(',') col = list(set(col_overlapped + col_selected_method)) path = USER_PATH / str(user_id) list_names = [] for f in os.listdir(path): file_path = path / f if os.path.isfile(file_path): df = PreProcess.getDF(file_path) if ValidateUser.is_subset(df.columns.to_list(), col): list_names.append(f) return json.dumps(list_names)
def send_warning(): id = request.args.get('id') UserData.send_warning(id) return "1"
def send_warnings(): emails = request.get_json() UserData.send_warnings(emails) return "1"
def view_merge_df(): user_id = g.user["id"] annotation_table = request.form.get("anno_tbl") col_sel_method = request.form.get("column_selection") file_name = request.form.get("available_files") if annotation_table and col_sel_method and file_name: file_path = USER_PATH / str(user_id) / file_name # Delete query if file already pre-processed UserData.delete_preprocess_file(user_id, file_name) if annotation_table == 'other': file = request.files['chooseFile'] if file and allowed_file(file.filename): annotation_table = secure_filename(file.filename) path_csv = ANNOTATION_TBL / "other" / (str(user_id) + "_" + annotation_table) # Delete same file uploaded result = UserData.get_user_file_by_file_name( user_id, annotation_table) annotation_df = pd.read_csv(file, usecols=[0, 1], header=0) col = annotation_df.columns if "ID" in col and "Gene Symbol" in col and len(col) == 2: annotation_df.to_csv(path_csv, index=False) else: flash( "Wrong Format: Gene Symbol and/or ID column not found in annotation table." ) return redirect('/pre') else: return abort(403) df = PreProcess.mergeDF(file_path, path_csv) if result is None: view_path = "/AnnotationTbls/other/" + str( user_id) + "_" + annotation_table UserData.add_file(annotation_table, annotation_table.split('.')[1], view_path, user_id, 1, 0) else: # load df annotation_table_path = UPLOAD_FOLDER.as_posix() + annotation_table df = PreProcess.mergeDF(file_path, Path(annotation_table_path)) if df is None: flash("Couldn't merge dataset with annotation table") return redirect('/pre') y = PreProcess.getDF(file_path) if 'class' not in y.columns: flash("Wrong Format: class column not found.") return redirect('/pre') y = y['class'] data = PreProcess.get_df_details(df, y) session[file_name] = data df = df.dropna(axis=0, subset=['Gene Symbol']) df = PreProcess.probe2Symbol(df, int(col_sel_method)) merge_name = "merge_" + file_name merge_path = USER_PATH / str(user_id) / "tmp" / merge_name merge_path_str = merge_path.as_posix() PreProcess.saveDF(df, merge_path_str) # save data to the Database UserData.add_preprocess(user_id, file_name, file_path.as_posix(), annotation_table, col_sel_method, merge_path_str) pre_process_id = UserData.get_user_preprocess(user_id, file_name)['id'] if len(df.columns) > 100: df_view = df.iloc[:, 0:100].head(15) else: df_view = df.head(15) return render_template("preprocess/step-2.html", tables=[df_view.to_html(classes='data')], details=data, pre_process_id=pre_process_id, file_name=merge_name) return redirect('/pre')
def get_reduce_features_from_pvalues(): fold = request.form["fold-range"] pvalue = request.form["p-value"] pre_process_id = request.form["id"] pre_process = UserData.get_preprocess_from_id(pre_process_id) p_fold_df_path = USER_PATH / str( g.user["id"]) / 'tmp' / ('_p_fold_' + pre_process['file_name']) p_fold_df = PreProcess.getDF(p_fold_df_path) if pre_process['avg_symbol_df_path']: df = PreProcess.get_filtered_df_pvalue( p_fold_df, pre_process['avg_symbol_df_path'], float(pvalue), float(fold)) else: # From step1 skip df = PreProcess.get_filtered_df_pvalue(p_fold_df, pre_process['file_path'], float(pvalue), float(fold), 0) fr_df_path = USER_PATH / str( g.user["id"]) / 'tmp' / ('fr_' + pre_process['file_name']) PreProcess.saveDF(df, fr_df_path) length = len(df.columns) if length <= 150: split_array = np.array([length]) elif length < 350: split_array = np.arange(150, int(length / 10) * 10, 10) else: split_array = np.linspace(150, 350, 21) split_array = split_array.astype(int) # Get classification Results df_y = PreProcess.getDF(Path(pre_process['file_path'])) y = df_y['class'] y = pd.to_numeric(y) classification_result_df = FeatureReduction.get_classification_results( df, y) cls_id, cls_name = FeatureReduction.get_best_cls(classification_result_df) classification_result_df = classification_result_df.drop(['Training'], axis=1) classification_result_df = classification_result_df.sort_values( by=['Testing'], ascending=False) classification_result_df = classification_result_df.set_index( ['Classifiers']) classification_result_df.index.name = None classification_result_df = classification_result_df.rename( columns={"Testing": "Testing Accuracy /%"}) fs_fig_hash = get_feature_selection_fig(df, df_y, length) UserData.update_preprocess(pre_process['user_id'], pre_process['file_name'], 'reduce_df_path', fr_df_path.as_posix()) UserData.update_preprocess(pre_process['user_id'], pre_process['file_name'], 'classifiers', cls_id) return render_template( "preprocess/step-6.html", split_array=split_array, fs_fig_hash=fs_fig_hash, tables=[classification_result_df.to_html(classes='data')], cls_names=cls_name, pre_process_id=pre_process_id)