def flex_vis_2d_handler(request): resp_data = dict() form = FlexViewForm(request.GET) if form.is_valid(): model_file_name = form.cleaned_data['model_file_name'] data_file_name = form.cleaned_data['data_file_name'] data_detail_file_name = form.cleaned_data['data_detail_file_name'] fs = FileStorage() if fs.is_file_in_base_location(model_file_name) and \ fs.is_file_in_base_location(data_file_name) and \ fs.is_file_in_base_location(data_detail_file_name): plot = process_model_data(model_file_name, data_file_name, data_detail_file_name) resp_data['bokeh_plot'] = plot else: resp_data['msg_error'] = "File(s) is invalid." else: resp_data['msg_error'] = escape(form._errors) return JsonResponse(resp_data)
def elbow_plot_handler_old(request): resp_data = dict() file_name = request.GET.get("file_name") column_header = request.GET.get("column_header") exclude_columns = request.GET.get("exclude_columns") print(column_header) if file_name: fs = FileStorage() file_full_path = fs.get_base_location() + file_name # If the file does exist, read data by panda and drop columns (if any) if fs.is_file(file_full_path): # Get data from file column_header_idx = None if column_header == "on": column_header_idx = 0; df = DataFrameUtil.convert_file_to_dataframe(file_full_path, header=column_header_idx) # Drop column specified by user if exclude_columns: str_column_indexs = exclude_columns.split(",") # column_indexs = list(map(int, str_column_indexs)) column_indexs = [int(i) - 1 for i in str_column_indexs] df = DataFrameUtil.drop_column_by_index(df, column_indexs) is_nan = np.any(np.isnan(df)) is_finite = np.all(np.isfinite(df)) # Standardize data X_scaled = PreProcessingUtil.standardize(df) # Get explain variance ratio pca_helper = PcaUtil() pca = pca_helper.get_fit_transfrom_pca(X_scaled) arr_variance_ratio = pca.explained_variance_ratio_ # Prepare all tabs to display Plot, Table by Bokeh # Add ratio to bokeh line graph elbow_plot = draw_elbow_plot(arr_variance_ratio) # Describe data # df_describe = df.describe().to_json() # df_describe_table = draw_df_describe_table(df) # Add line to a panel tab1 = Panel(child=elbow_plot, title="Elbow Curve Plot") # tab2 = Panel(child=df_describe_table, title="Data Description") # Add a panel to tab tabs = Tabs(tabs=[ tab1 ]) script, div = components(tabs) plots = { 'script': script, 'div': div} resp_data["bokeh_plot"] = plots # resp_data["data_describe"] = bokeh_df_describe_table else: resp_data["msg"] = "[ERROR] File is not found." else: resp_data['msg'] = "[ERROR] File name is invalid." return JsonResponse(resp_data)
def process_model_data(model_file_name, data_file_name, data_detail_file_name): # convert file to dataframe fs = FileStorage() # TODO change column_header_idx = None # Dataframe of data to process, it is new data apart from training df_data = DataFrameUtil.convert_file_to_dataframe(fs.get_full_path(data_file_name), \ header=column_header_idx) # Dataframe for matching index with processed data and show detail column_header_idx = 0 df_data_detail = DataFrameUtil.convert_file_to_dataframe(fs.get_full_path(data_detail_file_name), \ header=column_header_idx) # Load model model = ModelUtils.load_model(model_file_name) # TODO!!!!!! change to DB and dynamic # Do PCA logger.debug("Dimensionality Reduction by PCA...") pca_helper = PcaUtil() # Standardize data, reduce dimensions and return as X. X_scaled = PreProcessingUtil.fit_transform(df_data) # TODO change n =100 to dynamic X_reduced = pca_helper.get_pc(X_scaled, n_components=100) pred_y = model.predict(X_reduced) df_label = pd.DataFrame(pred_y, columns=["Label"]) # TODO Keep predicted result as label # https://www.geeksforgeeks.org/different-ways-to-create-pandas-dataframe/ X_graph = pca_helper.get_pc(X_scaled, n_components=2) df_data = pd.DataFrame(X_graph, columns=['PC1', 'PC2']) df_graph = df_label.join(df_data) scrip, div = draw_2d(df_graph, df_data_detail) plot = dict() plot['script'] = scrip plot['div'] = div # Matching detail of data based row/index return plot
def process_data(dataset_file, data_file, label_file, form): data_separator = form.data_separator ds_label_related = form.ds_label_related label_col_idx = form.label_col_idx nan_cleanup = form.nan_cleanup column_names = [] separator = "," if form.upload_type == UploadFileConst.UPLOAD_DATA_LABEL_SEPARATE: form.key_column_idx elif form.upload_type == UploadFileConst.UPLOAD_DATA_LABEL: pass elif form.upload_type == UploadFileConst.UPLOAD_DATA_ONLY: pass fs = FileStorage() file_name = fs.save_file(dataset_file) # ProcessDataset.process_data(file_name, form) file_url = "temp" if data_separator == "single_space": separator = " " elif data_separator == "tab": separator = "\t" # read file # df = df_util.convert_to_dataframe(file_path, column_names, separator) # data clean up if nan_cleanup == "mean": # find mean of each column and replace NaN pass elif nan_cleanup == "median": # find median of each column and replace NaN pass elif nan_cleanup == "delete": # delete NaN rows # df_util.drop_na(df, columns_names) pass
from django.utils.html import escape from django.views.decorators.csrf import csrf_exempt from data_preprocess.forms import UploadFileForm, CleanUpSettingsForm, ProcessFileForm, SaveFileForm, UploadFileAsForm from data_preprocess.logic import ProcessDataset from naphyutils.file import FileStorage from naphyutils.dataframe import DataFrameUtil import simplejson as json import pandas as pd from .forms import ExtractMatchedKeysForm import constants.const_msg as msg fs = FileStorage() # ====== Upload Menu ===== def init_data_upload_handler(request): """ Forward to main page of data management module. """ return render(request, template_name='upload.html') @csrf_exempt def upload_file_as_handler(request): """ Forward to main page of data management module.