Exemple #1
0
def save_reduced_df():
    features_count = request.form['features_count']
    pre_process_id = request.form['id']

    pre_process = UserData.get_preprocess_from_id(pre_process_id)

    df = PreProcess.getDF(Path(pre_process['reduce_df_path']))
    df_y = PreProcess.getDF(Path(pre_process['file_path']))
    y = df_y['class']
    y = pd.to_numeric(y)

    df_selected = FeatureReduction.getSelectedFeatures(df, int(features_count),
                                                       y)

    file_name = pre_process['file_name']

    path = USER_PATH / str(g.user["id"]) / ('GeNet_' + file_name)
    PreProcess.saveDF(df_selected, path)

    # remove old files
    files = [
        "merge_" + file_name, "avg_symbol_" + file_name,
        "_p_fold_" + file_name, "fr_" + file_name
    ]
    folder_path = USER_PATH / str(g.user["id"]) / "tmp"
    remove_files(folder_path, files)

    session[file_name] = None

    return redirect('/fs/?id=' + str(pre_process_id))
Exemple #2
0
def norm():
    norm_method = request.form.get("norm_mthd")
    null_rmv = request.form.get("null_rmv")
    pre_process_id = request.form.get("id")

    if norm_method and null_rmv and pre_process_id:

        pre_process = UserData.get_preprocess_from_id(pre_process_id)

        if pre_process is None:
            return redirect('/pre')

        user_id = pre_process['user_id']

        UserData.update_preprocess(user_id, pre_process['file_name'],
                                   'scaling', norm_method)
        UserData.update_preprocess(user_id, pre_process['file_name'],
                                   'imputation', null_rmv)

        if pre_process['merge_df_path'] == '':
            merge_df_path = Path(pre_process['file_path'])
            df = PreProcess.getDF(merge_df_path)
            df = df.drop(['class'], axis=1)
            df = df.T
            df = df.reset_index()

        else:
            merge_df_path = Path(pre_process['merge_df_path'])
            df = PreProcess.getDF(merge_df_path)

        df = PreProcess.step3(df, norm_method, null_rmv)  # symbol_df

        avg_symbol_name = "avg_symbol_" + pre_process['file_name']
        avg_symbol_df_path = USER_PATH / str(
            g.user["id"]) / "tmp" / avg_symbol_name

        avg_symbol_df_path_str = avg_symbol_df_path.as_posix()
        PreProcess.saveDF(df, avg_symbol_df_path_str)

        UserData.update_preprocess(user_id, pre_process['file_name'],
                                   'avg_symbol_df_path',
                                   avg_symbol_df_path_str)

        data = session[pre_process['file_name']]
        data = PreProcess.add_details_json(data, df, "r1")
        session[pre_process['file_name']] = data

        if len(df.columns) > 100:
            df_view = df.iloc[:, 0:100].head(15)
        else:
            df_view = df.head(15)

        return render_template("preprocess/step-4.html",
                               tablesstep4=[df_view.to_html(classes='data')],
                               details=data,
                               pre_process_id=pre_process_id,
                               file_name=avg_symbol_name)

    return redirect('/pre')
Exemple #3
0
def get_feature_count_pval():
    pvalue = request.args.get("pvalue")
    foldChange = request.args.get("foldChange")
    pre_process_id = request.args.get("id")

    pre_process = UserData.get_preprocess_from_id(pre_process_id)

    path = USER_PATH / str(
        g.user["id"]) / 'tmp' / ('_p_fold_' + pre_process['file_name'])
    p_fold_df = PreProcess.getDF(path)

    count = PreProcess.get_filtered_df_count_pvalue(p_fold_df, float(pvalue),
                                                    float(foldChange))
    return str(count)
Exemple #4
0
def scaling_imputation():
    pre_process_id = request.args.get("id")
    pre_process = UserData.get_preprocess_from_id(pre_process_id)

    if pre_process is None:
        return redirect('/pre')

    data = session.get(pre_process['file_name'])

    if data is not None:
        return render_template("preprocess/step-3.html",
                               details=data,
                               pre_process_id=pre_process_id)

    return redirect('/pre')
Exemple #5
0
def feature_reduction():
    pre_process_id = request.args.get("id")
    pre_process = UserData.get_preprocess_from_id(pre_process_id)

    if pre_process is None:
        return redirect('/pre')

    if pre_process['avg_symbol_df_path']:
        avg_symbol_df_path = Path(pre_process['avg_symbol_df_path'])
        file_path = Path(pre_process['file_path'])

        p_fold_df = PreProcess.get_pvalue_fold_df(avg_symbol_df_path,
                                                  file_path)
    else:
        # From step1
        file_path = Path(pre_process['file_path'])
        p_fold_df = PreProcess.get_pvalue_fold_df(file_path)

    p_fold_df_path = USER_PATH / str(
        g.user["id"]) / 'tmp' / ('_p_fold_' + pre_process['file_name'])
    PreProcess.saveDF(p_fold_df, p_fold_df_path)

    pvalues_max = p_fold_df['pValues'].max() * 0.1
    fold_max = p_fold_df['fold'].max() * 0.2

    pvalues = np.linspace(0.001, 0.01, 19)
    pvalues = np.around(pvalues, decimals=4)
    folds = np.linspace(0.001, fold_max, 40)
    folds = np.around(folds, decimals=4)

    data_array = [pvalues, folds]

    volcano_hash = get_volcano_fig(p_fold_df['fold'], p_fold_df['pValues'])
    UserData.update_preprocess(pre_process['user_id'],
                               pre_process['file_name'], 'volcano_hash',
                               volcano_hash)

    return render_template("preprocess/step-5.html",
                           data_array=data_array,
                           volcano_hash=volcano_hash,
                           pre_process_id=pre_process_id)
Exemple #6
0
def get_reduce_features_from_pvalues():
    fold = request.form["fold-range"]
    pvalue = request.form["p-value"]
    pre_process_id = request.form["id"]

    pre_process = UserData.get_preprocess_from_id(pre_process_id)

    p_fold_df_path = USER_PATH / str(
        g.user["id"]) / 'tmp' / ('_p_fold_' + pre_process['file_name'])
    p_fold_df = PreProcess.getDF(p_fold_df_path)

    if pre_process['avg_symbol_df_path']:
        df = PreProcess.get_filtered_df_pvalue(
            p_fold_df, pre_process['avg_symbol_df_path'], float(pvalue),
            float(fold))
    else:
        # From step1 skip
        df = PreProcess.get_filtered_df_pvalue(p_fold_df,
                                               pre_process['file_path'],
                                               float(pvalue), float(fold), 0)

    fr_df_path = USER_PATH / str(
        g.user["id"]) / 'tmp' / ('fr_' + pre_process['file_name'])
    PreProcess.saveDF(df, fr_df_path)

    length = len(df.columns)

    if length <= 150:
        split_array = np.array([length])
    elif length < 350:
        split_array = np.arange(150, int(length / 10) * 10, 10)
    else:
        split_array = np.linspace(150, 350, 21)

    split_array = split_array.astype(int)

    # Get classification Results
    df_y = PreProcess.getDF(Path(pre_process['file_path']))
    y = df_y['class']
    y = pd.to_numeric(y)

    classification_result_df = FeatureReduction.get_classification_results(
        df, y)
    cls_id, cls_name = FeatureReduction.get_best_cls(classification_result_df)

    classification_result_df = classification_result_df.drop(['Training'],
                                                             axis=1)
    classification_result_df = classification_result_df.sort_values(
        by=['Testing'], ascending=False)
    classification_result_df = classification_result_df.set_index(
        ['Classifiers'])
    classification_result_df.index.name = None
    classification_result_df = classification_result_df.rename(
        columns={"Testing": "Testing Accuracy /%"})

    fs_fig_hash = get_feature_selection_fig(df, df_y, length)

    UserData.update_preprocess(pre_process['user_id'],
                               pre_process['file_name'], 'reduce_df_path',
                               fr_df_path.as_posix())
    UserData.update_preprocess(pre_process['user_id'],
                               pre_process['file_name'], 'classifiers', cls_id)

    return render_template(
        "preprocess/step-6.html",
        split_array=split_array,
        fs_fig_hash=fs_fig_hash,
        tables=[classification_result_df.to_html(classes='data')],
        cls_names=cls_name,
        pre_process_id=pre_process_id)
Exemple #7
0
def view_merge_df():
    id = request.args.get("id")

    user_id = g.user["id"]
    annotation_table = request.form.get("anno_tbl")
    col_sel_method = request.form.get("column_selection")
    file_name = request.form.get("available_files")

    if annotation_table and col_sel_method and file_name:

        file_path = USER_PATH / str(user_id) / file_name

        # Delete query if file already pre-processed
        UserData.delete_preprocess_file(user_id, file_name)

        if annotation_table == 'other':
            file = request.files['chooseFile']

            if file and allowed_file(file.filename):

                annotation_table = secure_filename(file.filename)
                path_csv = ANNOTATION_TBL / "other" / (str(user_id) + "_" +
                                                       annotation_table)

                # Delete same file uploaded
                result = UserData.get_user_file_by_file_name(
                    user_id, annotation_table)

                annotation_df = pd.read_csv(file, usecols=[0, 1], header=0)
                col = annotation_df.columns

                if "ID" in col and "Gene Symbol" in col and len(col) == 2:
                    annotation_df.to_csv(path_csv, index=False)

                else:
                    flash(
                        "Wrong Format: Gene Symbol and/or ID column not found in annotation table."
                    )
                    return redirect('/pre')

            else:
                return abort(403)

            df = PreProcess.mergeDF(file_path, path_csv)

            if result is None:
                view_path = "/AnnotationTbls/other/" + str(
                    user_id) + "_" + annotation_table
                UserData.add_file(annotation_table,
                                  annotation_table.split('.')[1], view_path,
                                  user_id, 1, 0)

        else:
            # load df
            annotation_table_path = UPLOAD_FOLDER.as_posix() + annotation_table
            df = PreProcess.mergeDF(file_path, Path(annotation_table_path))

        if df is None:
            flash("Couldn't merge dataset with annotation table")
            return redirect('/pre')

        y = PreProcess.getDF(file_path)
        if 'class' not in y.columns:
            flash("Wrong Format: class column not found.")
            return redirect('/pre')

        y = y['class']
        data = PreProcess.get_df_details(df, y)

        session[file_name] = data

        df = df.dropna(axis=0, subset=['Gene Symbol'])
        df = PreProcess.probe2Symbol(df, int(col_sel_method))

        merge_name = "merge_" + file_name
        merge_path = USER_PATH / str(user_id) / "tmp" / merge_name
        merge_path_str = merge_path.as_posix()
        PreProcess.saveDF(df, merge_path_str)

        # save data to the Database
        UserData.add_preprocess(user_id, file_name, file_path.as_posix(),
                                annotation_table, col_sel_method,
                                merge_path_str, 0)
        pre_process_id = UserData.get_user_preprocess(user_id, file_name)['id']

        # df = df.sort_values(df.columns[0], ascending=False)
        df = df.set_index([df.columns[0]])
        df.columns.name = df.index.name
        df.index.name = None

        if len(df.columns) > 100:
            df_view = df.iloc[:, 0:100].head(15)
        else:
            df_view = df.head(15)

        return render_template("preprocess/step-2.html",
                               tables=[df_view.to_html(classes='data')],
                               details=data,
                               pre_process_id=pre_process_id,
                               file_name=merge_name)

    elif id:
        pre_process = UserData.get_preprocess_from_id(id)

        if pre_process and pre_process['merge_df_path']:
            merge_name = "merge_" + pre_process['file_name']
            merge_path = Path(pre_process['merge_df_path'])
            df = PreProcess.getDF(merge_path)

            data = session[pre_process['file_name']]
            print(data)
            df = df.set_index([df.columns[0]])
            df.columns.name = df.index.name
            df.index.name = None

            if len(df.columns) > 100:
                df_view = df.iloc[:, 0:100].head(15)
            else:
                df_view = df.head(15)

            return render_template("preprocess/step-2.html",
                                   tables=[df_view.to_html(classes='data')],
                                   details=data,
                                   pre_process_id=id,
                                   file_name=merge_name)

    return redirect('/pre')