Exemple #1
0
def feature_reduction():
    pre_process_id = request.args.get("id")
    pre_process = UserData.get_preprocess_from_id(pre_process_id)

    if pre_process is None:
        return redirect('/pre')

    if pre_process['avg_symbol_df_path']:
        avg_symbol_df_path = Path(pre_process['avg_symbol_df_path'])
        file_path = Path(pre_process['file_path'])

        p_fold_df = PreProcess.get_pvalue_fold_df(avg_symbol_df_path,
                                                  file_path)
    else:
        # From step1
        file_path = Path(pre_process['file_path'])
        p_fold_df = PreProcess.get_pvalue_fold_df(file_path)

    p_fold_df_path = USER_PATH / str(
        g.user["id"]) / 'tmp' / ('_p_fold_' + pre_process['file_name'])
    PreProcess.saveDF(p_fold_df, p_fold_df_path)

    pvalues_max = p_fold_df['pValues'].max() * 0.1
    fold_max = p_fold_df['fold'].max() * 0.2

    pvalues = np.linspace(0.001, 0.01, 19)
    pvalues = np.around(pvalues, decimals=4)
    folds = np.linspace(0.001, fold_max, 40)
    folds = np.around(folds, decimals=4)

    data_array = [pvalues, folds]

    volcano_hash = get_volcano_fig(p_fold_df['fold'], p_fold_df['pValues'])
    UserData.update_preprocess(pre_process['user_id'],
                               pre_process['file_name'], 'volcano_hash',
                               volcano_hash)

    return render_template("preprocess/step-5.html",
                           data_array=data_array,
                           volcano_hash=volcano_hash,
                           pre_process_id=pre_process_id)
Exemple #2
0
def norm():
    norm_method = request.form.get("norm_mthd")
    null_rmv = request.form.get("null_rmv")
    pre_process_id = request.form.get("id")

    if norm_method and null_rmv and pre_process_id:

        pre_process = UserData.get_preprocess_from_id(pre_process_id)

        if pre_process is None:
            return redirect('/pre')

        user_id = pre_process['user_id']

        UserData.update_preprocess(user_id, pre_process['file_name'],
                                   'scaling', norm_method)
        UserData.update_preprocess(user_id, pre_process['file_name'],
                                   'imputation', null_rmv)

        if pre_process['merge_df_path'] == '':
            merge_df_path = Path(pre_process['file_path'])
            df = PreProcess.getDF(merge_df_path)
            df = df.drop(['class'], axis=1)
            df = df.T
            df = df.reset_index()

        else:
            merge_df_path = Path(pre_process['merge_df_path'])
            df = PreProcess.getDF(merge_df_path)

        df = PreProcess.step3(df, norm_method, null_rmv)  # symbol_df

        avg_symbol_name = "avg_symbol_" + pre_process['file_name']
        avg_symbol_df_path = USER_PATH / str(
            g.user["id"]) / "tmp" / avg_symbol_name

        avg_symbol_df_path_str = avg_symbol_df_path.as_posix()
        PreProcess.saveDF(df, avg_symbol_df_path_str)

        UserData.update_preprocess(user_id, pre_process['file_name'],
                                   'avg_symbol_df_path',
                                   avg_symbol_df_path_str)

        data = session[pre_process['file_name']]
        data = PreProcess.add_details_json(data, df, "r1")
        session[pre_process['file_name']] = data

        if len(df.columns) > 100:
            df_view = df.iloc[:, 0:100].head(15)
        else:
            df_view = df.head(15)

        return render_template("preprocess/step-4.html",
                               tablesstep4=[df_view.to_html(classes='data')],
                               details=data,
                               pre_process_id=pre_process_id,
                               file_name=avg_symbol_name)

    return redirect('/pre')
Exemple #3
0
def get_reduce_features_from_pvalues():
    fold = request.form["fold-range"]
    pvalue = request.form["p-value"]
    pre_process_id = request.form["id"]

    pre_process = UserData.get_preprocess_from_id(pre_process_id)

    p_fold_df_path = USER_PATH / str(
        g.user["id"]) / 'tmp' / ('_p_fold_' + pre_process['file_name'])
    p_fold_df = PreProcess.getDF(p_fold_df_path)

    if pre_process['avg_symbol_df_path']:
        df = PreProcess.get_filtered_df_pvalue(
            p_fold_df, pre_process['avg_symbol_df_path'], float(pvalue),
            float(fold))
    else:
        # From step1 skip
        df = PreProcess.get_filtered_df_pvalue(p_fold_df,
                                               pre_process['file_path'],
                                               float(pvalue), float(fold), 0)

    fr_df_path = USER_PATH / str(
        g.user["id"]) / 'tmp' / ('fr_' + pre_process['file_name'])
    PreProcess.saveDF(df, fr_df_path)

    length = len(df.columns)

    if length <= 150:
        split_array = np.array([length])
    elif length < 350:
        split_array = np.arange(150, int(length / 10) * 10, 10)
    else:
        split_array = np.linspace(150, 350, 21)

    split_array = split_array.astype(int)

    # Get classification Results
    df_y = PreProcess.getDF(Path(pre_process['file_path']))
    y = df_y['class']
    y = pd.to_numeric(y)

    classification_result_df = FeatureReduction.get_classification_results(
        df, y)
    cls_id, cls_name = FeatureReduction.get_best_cls(classification_result_df)

    classification_result_df = classification_result_df.drop(['Training'],
                                                             axis=1)
    classification_result_df = classification_result_df.sort_values(
        by=['Testing'], ascending=False)
    classification_result_df = classification_result_df.set_index(
        ['Classifiers'])
    classification_result_df.index.name = None
    classification_result_df = classification_result_df.rename(
        columns={"Testing": "Testing Accuracy /%"})

    fs_fig_hash = get_feature_selection_fig(df, df_y, length)

    UserData.update_preprocess(pre_process['user_id'],
                               pre_process['file_name'], 'reduce_df_path',
                               fr_df_path.as_posix())
    UserData.update_preprocess(pre_process['user_id'],
                               pre_process['file_name'], 'classifiers', cls_id)

    return render_template(
        "preprocess/step-6.html",
        split_array=split_array,
        fs_fig_hash=fs_fig_hash,
        tables=[classification_result_df.to_html(classes='data')],
        cls_names=cls_name,
        pre_process_id=pre_process_id)