Ejemplo n.º 1
0
def flex_vis_2d_handler(request):
    resp_data = dict()
    form = FlexViewForm(request.GET)
    if form.is_valid():
        model_file_name = form.cleaned_data['model_file_name']
        data_file_name = form.cleaned_data['data_file_name']
        data_detail_file_name = form.cleaned_data['data_detail_file_name']

        fs = FileStorage()
        if fs.is_file_in_base_location(model_file_name) and \
            fs.is_file_in_base_location(data_file_name) and \
            fs.is_file_in_base_location(data_detail_file_name):

            plot = process_model_data(model_file_name, data_file_name,
                                      data_detail_file_name)

            resp_data['bokeh_plot'] = plot
        else:

            resp_data['msg_error'] = "File(s) is invalid."
    else:

        resp_data['msg_error'] = escape(form._errors)

    return JsonResponse(resp_data)
Ejemplo n.º 2
0
def elbow_plot_handler_old(request):
    resp_data = dict()
    file_name = request.GET.get("file_name")
    column_header = request.GET.get("column_header")
    exclude_columns = request.GET.get("exclude_columns")
    print(column_header)
    if file_name:
        fs = FileStorage()
        file_full_path = fs.get_base_location() + file_name
        
        # If the file does exist, read data by panda and drop columns (if any)
        if fs.is_file(file_full_path):
            # Get data from file
            column_header_idx = None
            if column_header == "on":
                column_header_idx = 0;
               
            df = DataFrameUtil.convert_file_to_dataframe(file_full_path, header=column_header_idx) 
            # Drop column specified by user
            if exclude_columns:
                str_column_indexs = exclude_columns.split(",")
                # column_indexs = list(map(int, str_column_indexs))
                column_indexs = [int(i) - 1 for i in str_column_indexs]
                df = DataFrameUtil.drop_column_by_index(df, column_indexs)
                is_nan = np.any(np.isnan(df))
                is_finite = np.all(np.isfinite(df))
            
            # Standardize data
            X_scaled = PreProcessingUtil.standardize(df)
            
            # Get explain variance ratio
            pca_helper = PcaUtil()
            pca = pca_helper.get_fit_transfrom_pca(X_scaled)
            arr_variance_ratio = pca.explained_variance_ratio_
            
            # Prepare all tabs to display Plot, Table by Bokeh
            # Add ratio to bokeh line graph
            elbow_plot = draw_elbow_plot(arr_variance_ratio)
            
            # Describe data 
#             df_describe = df.describe().to_json()
           #  df_describe_table = draw_df_describe_table(df)
            
            # Add line to a panel
            tab1 = Panel(child=elbow_plot, title="Elbow Curve Plot")
            # tab2 = Panel(child=df_describe_table, title="Data Description")
            # Add a panel to tab
            tabs = Tabs(tabs=[ tab1 ])

            script, div = components(tabs)
            plots = { 'script': script, 'div': div}
            resp_data["bokeh_plot"] = plots
            # resp_data["data_describe"] = bokeh_df_describe_table
        else:
            resp_data["msg"] = "[ERROR] File is not found."
        
    else:
        resp_data['msg'] = "[ERROR] File name is invalid."
    
    return JsonResponse(resp_data) 
Ejemplo n.º 3
0
def process_model_data(model_file_name, data_file_name, data_detail_file_name):
    # convert file to dataframe
    fs = FileStorage()
    # TODO change
    column_header_idx = None
    # Dataframe of data to process, it is new data apart from training
    df_data = DataFrameUtil.convert_file_to_dataframe(fs.get_full_path(data_file_name), \
                                             header=column_header_idx)

    # Dataframe for matching index with processed data and show detail
    column_header_idx = 0
    df_data_detail = DataFrameUtil.convert_file_to_dataframe(fs.get_full_path(data_detail_file_name), \
                                             header=column_header_idx)

    # Load model
    model = ModelUtils.load_model(model_file_name)

    # TODO!!!!!! change to DB and dynamic
    # Do PCA
    logger.debug("Dimensionality Reduction by PCA...")
    pca_helper = PcaUtil()
    # Standardize data, reduce dimensions and return as X.
    X_scaled = PreProcessingUtil.fit_transform(df_data)

    # TODO change n =100 to dynamic
    X_reduced = pca_helper.get_pc(X_scaled, n_components=100)
    pred_y = model.predict(X_reduced)
    df_label = pd.DataFrame(pred_y, columns=["Label"])

    # TODO Keep predicted result as label

    # https://www.geeksforgeeks.org/different-ways-to-create-pandas-dataframe/
    X_graph = pca_helper.get_pc(X_scaled, n_components=2)
    df_data = pd.DataFrame(X_graph, columns=['PC1', 'PC2'])

    df_graph = df_label.join(df_data)
    scrip, div = draw_2d(df_graph, df_data_detail)

    plot = dict()
    plot['script'] = scrip
    plot['div'] = div
    # Matching detail of data based row/index

    return plot
Ejemplo n.º 4
0
    def process_data(dataset_file, data_file, label_file, form):
        data_separator = form.data_separator
        ds_label_related = form.ds_label_related
        label_col_idx = form.label_col_idx
        nan_cleanup = form.nan_cleanup
        column_names = []
        separator = ","

        if form.upload_type == UploadFileConst.UPLOAD_DATA_LABEL_SEPARATE:

            form.key_column_idx

        elif form.upload_type == UploadFileConst.UPLOAD_DATA_LABEL:
            pass

        elif form.upload_type == UploadFileConst.UPLOAD_DATA_ONLY:
            pass

        fs = FileStorage()
        file_name = fs.save_file(dataset_file)
        # ProcessDataset.process_data(file_name, form)
        file_url = "temp"

        if data_separator == "single_space":
            separator = " "
        elif data_separator == "tab":
            separator = "\t"
        # read file
        # df = df_util.convert_to_dataframe(file_path, column_names, separator)
        # data clean up
        if nan_cleanup == "mean":
            # find mean of each column and replace NaN
            pass
        elif nan_cleanup == "median":
            # find median of each column and replace NaN
            pass
        elif nan_cleanup == "delete":
            # delete NaN rows
            # df_util.drop_na(df, columns_names)
            pass
from django.utils.html import escape
from django.views.decorators.csrf import csrf_exempt

from data_preprocess.forms import UploadFileForm, CleanUpSettingsForm, ProcessFileForm, SaveFileForm, UploadFileAsForm
from data_preprocess.logic import ProcessDataset
from naphyutils.file import FileStorage
from naphyutils.dataframe import DataFrameUtil
import simplejson as json

import pandas as pd

from .forms import ExtractMatchedKeysForm

import constants.const_msg as msg

fs = FileStorage()
    
# ====== Upload Menu =====

  
def init_data_upload_handler(request):
        """
        Forward to main page of data management module.
        """
        return render(request, template_name='upload.html')

     
@csrf_exempt    
def upload_file_as_handler(request):
    """
    Forward to main page of data management module.