def view_silhouette_analysis_handler(request): resp_data = dict() form = SilhouetteAnalysisForm(request.GET) if form.is_valid(): # Read file and draw plot n_cluster_from = form.cleaned_data['n_cluster_from'] n_cluster_to = form.cleaned_data['n_cluster_to'] n_cluster_to += 1 df = get_scaled_dataframe(form) X_redueced = PcaUtil.reduce_dimension(df, n_components=2) arr_sse = KMeanUtil.get_silhouette_score(X_redueced, n_cluster_from, n_cluster_to, random_state=42) x = [item[0] for item in arr_sse] y = [item[1] for item in arr_sse] p = draw_line_chart(x, y, "Silhouette Score", "Number of Clusters", "SSE") resp_data['plot'] = escape(p) else: # Return error message in form of HTML after validate input data in Form. resp_data[msg.ERROR] = escape(form._errors) return JsonResponse(resp_data)
def view_kmean_analysis_handler(request): resp_data = dict() form = KMeanForm(request.GET) if form.is_valid(): # Read file and draw plot df = get_scaled_dataframe(form) n_clusters = form.cleaned_data['n_clusters'] X_redueced = PcaUtil.reduce_dimension(df, n_components=2) kmeans = KMeanUtil.get_kmean_model(X_redueced, n_clusters) label = kmeans.predict(X_redueced) # TODO delete later res = kmeans.__dict__ print(res['cluster_centers_']) # X_redueced = PcaUtil.reduce_dimension(X_redueced, n_components=2) df = pd.DataFrame(data=X_redueced, columns=["x", "y"]) df_label = pd.DataFrame(data=kmeans.labels_, columns=['label']) df = df.join(df_label) p = draw_kmean(kmeans, df) resp_data['plot'] = escape(p) else: # Return error message in form of HTML after validate input data in Form. resp_data[msg.ERROR] = escape(form._errors) return JsonResponse(resp_data)
def view_mean_shift_analysis_handler(request): """ Process data from selected file and generate plot for cluster """ resp_data = dict() form = DataForm(request.GET) # Check if input data matches with setting in Form. if form.is_valid(): # Get data from request in form.cleaned_data (return as specified data type in Form) data_file_name = form.cleaned_data['data_file_name'] column_header = form.cleaned_data['column_header'] # Check if the file does exist in storage if fs.is_file_in_base_location(data_file_name): df = get_scaled_dataframe(form) # Do mean shift on data X X_redueced = PcaUtil.reduce_dimension(df, n_components=2) kmeans = MeanShiftUtil.mean_shift(X_redueced) res = kmeans.__dict__ print(res['cluster_centers_']) p = draw_mean_shift(kmeans, X_redueced) resp_data['plot'] = escape(p) else: # File does not exist in storage resp_data[msg.ERROR] = "The file does not exist in storage." return JsonResponse(resp_data) else: # Return error message in form of HTML after validate input data in Form. resp_data[msg.ERROR] = escape(form._errors) return JsonResponse(resp_data)
def view_elbow_method_handler(request): """ Sum square error from different number of cluster """ resp_data = dict() form = ElbowMethodForm(request.GET) if form.is_valid(): # Read file and draw plot df = get_scaled_dataframe(form) n_cluster_from = form.cleaned_data['n_cluster_from'] n_cluster_to = form.cleaned_data['n_cluster_to'] n_cluster_to += 1 X_redueced = PcaUtil.reduce_dimension(df, n_components=2) arr_sse = KMeanUtil.get_kmean_sse(X_redueced, n_cluster_from, n_cluster_to, random_state=42) # arr_n_clusters = np.arange(n_cluster_from, n_cluster_to) x = [item[0] for item in arr_sse] y = [item[1] for item in arr_sse] p = draw_line_chart(x, y, "Elbow Method", "Number of Clusters", "SSE") resp_data['plot'] = escape(p) else: # Return error message in form of HTML after validate input data in Form. resp_data[msg.ERROR] = escape(form._errors) return JsonResponse(resp_data)
def load_model(model_name): # TODO change to load setting from DB DB # model_file_name = "radiomic482_svm_ovo_model.joblib" # model = ModelUtils.load_model(model_file_name) # TODO below data must be trained data df_train = DataFrameUtil.convert_file_to_dataframe( fs.get_full_path("radiomic482_no_key.csv"), header=0) X_scaled = PreProcessingUtil.standardize(df_train) X_reduced = PcaUtil.reduce_dimension(X_scaled, n_components=50) model = KMeanUtil.get_kmean_model(X_reduced, n_clusters=5, random_state=42) return model
def process(): # TODO need to pass model name from DB model = load_model(model_name="xx") df_base_space = read_based_space_to_dataframe() X_scaled = PreProcessingUtil.standardize(df_base_space) X_reduced = PcaUtil.reduce_dimension(X_scaled, n_components=50) label = model.predict(X_reduced) X_2d = PcaUtil.reduce_dimension(X_scaled, n_components=2) # TODO add file name from DB df_data_detail = read_data_detail_to_dataframe(data_file_name="") # Join all data to one dataframe: x, y, label, data_detail df_result = pd.DataFrame(data=X_2d, columns=['x', 'y']) df_label = pd.DataFrame(data=label, columns=['label']) df_result = df_result.join(df_label) # Add Medical History Resuolt df_result = df_result.join(df_data_detail) # Add Radiomic Result df_result = df_result.join(df_base_space) return df_result
def pca_plot(request): """ Display home page of PCA """ form = PcaPlotForm(request.POST, request.FILES) resp_data = dict(); # PCA 3D plot = dict() if form.is_valid(): # Get input files data_file = form.cleaned_data["data_file"] df_input = DataFrameUtil.file_to_dataframe(data_file, header=None) X, pca = PcaUtil.reduce_dimension(df_input, n_components=3) plot['x'] = list(X[:, 0]) plot['y'] = list(X[:, 1]) plot['z'] = list(X[:, 2]) resp_data['plot'] = plot # print(resp_data) else: resp_data[msg.ERROR] = escape(form._errors) return JsonResponse(resp_data)
def process_pipeline(arr_pipeline, X, y, parameters): result = dict() clf = None # Model score = None for p in arr_pipeline: if p == "sfs": # Select data clf = feature_selection_sfs(X, y, parameters) if isinstance(X, pd.DataFrame): X = DataFrameUtil.get_columns_by_indexes( X, list(clf.k_feature_idx_)) elif isinstance(X, np.ndarray): X = X[:, list(clf.k_feature_idx_)] result["scores"] = clf.k_score_ result['table_columns'] = ['Feature Indexes', 'Feature Names'] # Convert data to array arr_feature_indexes = list(clf.k_feature_idx_) arr_feature_names = list(clf.k_feature_names_) result['table_data'] = [arr_feature_indexes, arr_feature_names] elif p == "select_k_best": # !! Input X must be non-negative. n_k = parameters['select_k_best_n_k'] X = SelectKBest(chi2, k=n_k).fit_transform(X, y) elif p == "scale": # Standardize data X = PreProcessingUtil.fit_transform(X) elif p == "pca": # reduce dimensions and return as X. # logger.debug("Dimensionality Reduction by PCA...") n_components = parameters['pca_n_components'] pca_helper = PcaUtil() X = pca_helper.reduce_dimension(X, n_components) elif p == "kernel_pca": # reduce dimensions and return as X. n_components = parameters['kernel_pca_n_components'] kpca = KernelPCA(n_components=n_components, kernel='rbf', gamma=15) X = kpca.fit_transform(X, y) elif p == "lda": n_components = parameters['lda_n_components'] clf = LinearDiscriminantAnalysis(n_components=n_components) X = clf.fit_transform(X, y) elif p == "tsne": n_components = parameters['tsne_n_components'] clf = TSNE(n_components=n_components) X = clf.fit_transform(X, y) elif p == "svmovo": # Split train, test data based on specified ratio. # Select to create SVM as one vs one or one vs all clf = svm.SVC(gamma='scale', decision_function_shape='ovo') # no fit_transform function for SVC # clf.fit(X, y) elif p == "svmovr": clf = svm.LinearSVC(max_iter=5000) elif p == "kfold": n_folds = parameters['n_folds'] scores = cross_val_score(clf, X, y, cv=n_folds) txt_accuracy = "%0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2) result["scores"] = scores.tolist() result["accuracy_mean"] = scores.mean() elif p == "stratified_kfold": stratified_kfold_n_split = parameters['stratified_kfold_n_split'] stratified_kfold_shuffle = parameters['stratified_kfold_shuffle'] StratifiedKFold(n_splits=stratified_kfold_n_split, shuffle=stratified_kfold_shuffle, random_state=42) elif p == "handout": # Set random_state here to get the same split for different run. test_size = parameters['test_size'] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=42) # X = X_test if isinstance(clf, svm.SVC) or isinstance(clf, LinearSVC): clf.fit(X_train, y_train) y = clf.predict(X) else: # t-SNE, not SVM X = clf.fit_transform(X_train, y_train) if not isinstance(clf, TSNE): result["scores"] = clf.score(X_test, y_test).tolist() # result['X'] = X.tolist(); # result['y'] = y; # if p != "sfs" and clf: # result["params"] = clf.get_params(deep=True) print(clf) return result, X, y, clf
def get_reduced_dimension_X(form): df = get_scaled_dataframe(form) # Reduce dimension if specify X = PcaUtil.reduce_dimension(df, n_components=2) return X
def get_reduced_dim_data(df_source, df_target, feature_indexes, target_label_index, arr_target_filter_col, arr_numtypes, arr_criterion, reduce_dim_algorithm, n_components): """ Filter data by criterion and do PCA for 3d reduce_dim_algorithm: Only PCA is implemented for this phase """ # Select only the selected source columns in radiomics df_selected_source = Helper.get_selected_columns_data( df_source, feature_indexes) df_selected_target = Helper.get_selected_columns_data( df_target, arr_target_filter_col) # Use length to split result between source and label later len_selected_source = len(df_selected_source.columns) df_data = df_selected_source.join(df_selected_target) # df_data, arr_criterion_columns, arr_numtypes, arr_criterion arr_criterion_columns = list(df_selected_target.columns) # get_filtered_data(df_data, target_col_indexes, arr_numtypes, arr_criterion_column_names, arr_criterion_value): df_start_res = Helper.get_filtered_data(df_data, arr_numtypes, arr_criterion_columns, arr_criterion) X = df_start_res.iloc[:, 0:len_selected_source] y = df_start_res[[df_target.columns.values[int(target_label_index)]]] # Select target columns # df_selected_source = df_source.iloc[:, arr_int_source_col_idx] # arr_selected_source_col = list(df_selected_source.columns) # Standardize data X_scaled = PreProcessingUtil.standardize(X) # When 3 features are selected, skip doing PCA and directly return result from filtering and standard scalar. if len_selected_source == 3: return X_scaled, y else: dim_3d = [] pca_helper = PcaUtil() if reduce_dim_algorithm == PCA: # Get X transformed by PCA dim_3d, pca = pca_helper.reduce_dimension( X_scaled, n_components=n_components) elif reduce_dim_algorithm == LDA: new_y = None # LDA support only 1 target, so this encode only one target col_y = y.columns.values label_type = y.loc[:, col_y[0]].dtype if label_type == 'object': encoder = EncodingCategoricalFeatures() new_y = encoder.label_encoder(y.loc[:, col_y[0]].values) elif label_type in [np.float64]: raise BizValidationExption( "Target Label", "Data type cannot be float number.") else: new_y = y if isinstance(new_y, pd.core.frame.DataFrame): new_y = y.values n_labels = len(np.unique(new_y)) if n_labels <= 3: raise BizValidationExption( "LDA", "To reduce dimension by LDA to 3 dimensions, number of classes must be greater than 3." ) # Dont specify , n_components=n_components in PCA because the result is different X_transformed, pca = pca_helper.reduce_dimension(X_scaled) dim_3d = LdaUtil.reduce_dimension(X_transformed, new_y.ravel(), n_components=n_components) return dim_3d, y
def process_data_handler(request): """ Get data for analysis and general information Result format plot: {original_data: {x: .., y:.., label: ...}, new_data: {x:..., y:..., label:...} data_table: {table_columns: ..., table_data: ...}} msg_info|msg_error|msg_success|msg_warning| : .... data_tables: {table1: { table_columns: [..,..] , table_data: [[..]], point_id: [...]}, table2: {...}} """ form = VisInputForm(request.POST, request.FILES) resp_data = dict() plot = dict() data_tables = dict() if form.is_valid(): data_file = form.cleaned_data["data_file"] label_file = form.cleaned_data["label_file"] add_data_file = form.cleaned_data["add_data_file"] predict_data_file = form.cleaned_data["new_data_file"] general_data_file = form.cleaned_data["general_data_file"] data_column_header = form.cleaned_data['data_column_header'] add_data_column_header = form.cleaned_data['add_data_column_header'] label_column_header = form.cleaned_data['label_column_header'] new_data_column_header = form.cleaned_data['new_data_column_header'] general_data_column_header = form.cleaned_data[ 'general_data_column_header'] df_data = pd.DataFrame() # Original data space df_label = pd.DataFrame() # Label of original data df_add_data = pd.DataFrame() # Additional data for base space df_new_data = pd.DataFrame() # New data to predict df_general_info = pd.DataFrame() # General info # Check if data contain table header or not. # Then select data with/without table header to generate dataframe. df_X_ori2d = None data_column_header_idx = None if data_file: if data_column_header == "on": data_column_header_idx = 0 df_data = DataFrameUtil.file_to_dataframe( data_file, header=data_column_header_idx) # Reduce dimension for visualization X_scaled = PreProcessingUtil.fit_transform(df_data) X_ori2d, pca = PcaUtil.reduce_dimension(X_scaled, n_components=2) # print(X_ori2d) # Convert result to resulting dataframe df_plot_original = pd.DataFrame(data=X_ori2d, columns=['x', 'y']) df_y_ori = None if label_file: label_column_header_idx = None if label_column_header == "on": label_column_header_idx = 0 df_label = DataFrameUtil.file_to_dataframe( label_file, header=label_column_header_idx) # df_y_ori = pd.DataFrame(data=df_label.values, columns=['label']) # Process additional data for data table df_add_data_id = pd.DataFrame() # For unique ID to add to data point if add_data_file: add_data_column_header_idx = None if add_data_column_header == "on": add_data_column_header_idx = 0 df_add_data = DataFrameUtil.file_to_dataframe( add_data_file, header=add_data_column_header_idx) df_add_data_id = df_add_data.iloc[:, 0] # Join base space X, y ==> label, x coordinate, y coordinate df_plot_original['label'] = df_label # Optional: Add unique key to data point if not df_add_data_id.empty: # Join id at the first column to format of: point_id, label, x, y # df_add_data_id = pd.DataFrame(data=df_add_data_id.values, columns=['point_id']) df_plot_original['point_id'] = df_add_data_id.values # df_plot_original = df_add_data_id.join(df_plot_original) # point_id, label, x, y plot["original_data"] = df_plot_original.to_json() # For SlickGrid format plot["original_data_split"] = df_plot_original.to_json( orient='columns') # ========== End of processing original data for data point ====== # Convert additional data to dataframe --> json response df_plot_predict = pd.DataFrame() # If new data file is uploaded, predict the data and add to plot if predict_data_file: new_column_header_idx = None if label_column_header == "on": label_column_header_idx = 0 df_new_data = DataFrameUtil.file_to_dataframe( predict_data_file, header=new_column_header_idx) # Process data with pipeline of selected algorithm X_new_scaled, y_predict = predict_new_data(df_new_data) X_new2d, new_pca = PcaUtil.reduce_dimension(X_new_scaled, n_components=2) df_plot_predict = pd.DataFrame(data=X_new2d, columns=['x', 'y']) df_plot_predict['label'] = y_predict # If additional info for predict data is uploaded, get ID from the file plot['new_data'] = df_plot_predict.to_json() # If additional info for predicting data is uploaded # Update new_data with point_id to get data in format of # point_id, label, x, y df_predict_data_info = pd.DataFrame() df_predict_data_id = pd.DataFrame() if general_data_file: general_data_column_header_idx = None if general_data_column_header == "on": general_data_column_header_idx = 0 df_predict_data_info = DataFrameUtil.file_to_dataframe( general_data_file, header=general_data_column_header_idx) # Optional: Add unique key to data point # Join id at the first column to point_id, label, x, y # df_predict_data_id = pd.DataFrame(data=df_predict_data_info.iloc[:, 0].values, columns=['point_id']) # df_plot_predict = df_predict_data_id.join(df_plot_predict) df_plot_predict[ 'point_id'] = data = df_predict_data_info.iloc[:, 0].values plot['new_data'] = df_plot_predict.to_json() # =========== End of Processing Predict Data ========= if not df_predict_data_info.empty: # append general info of new data to based space df_add_data = df_add_data.append(df_predict_data_info) # Prepare data for visualize resp_data['plot'] = plot # id for slickgrid (required) if not df_add_data_id.empty: df_data.insert(loc=0, column='id', value=df_add_data_id.values) else: df_data.insert(loc=0, column='id', value=np.arange(0, df_data.shape[0])) data_tables['table1'] = { 'table_data': df_data.to_json(orient='records'), \ 'point_id': str(list(df_data['id'].values))} if not df_add_data.empty: # For SlickGrid use orient='records' # Format point_id: [{..}, {..}] df_add_data['id'] = df_add_data.iloc[:, 0].values # Slickgrid does not support column with dot like "f.eid" df_add_data.rename(columns={'f.eid': 'f:eid'}, inplace=True) data_tables['table2'] = { 'table_data': df_add_data.to_json(orient='records'), \ 'point_id': df_add_data.iloc[:, 0].to_json(orient='values')} # TypeError: Object of type 'int64' is not JSON serializable # Then cast to str resp_data['height_min'] = str(df_add_data['height'].min()) resp_data['height_max'] = str(df_add_data['height'].max()) resp_data['weight_min'] = str(df_add_data['weight'].min()) resp_data['weight_max'] = str(df_add_data['weight'].max()) resp_data['age_min'] = str(df_add_data['age'].min()) resp_data['age_max'] = str(df_add_data['age'].max()) resp_data['data_tables'] = data_tables else: resp_data[msg.ERROR] = escape(form._errors) return JsonResponse(resp_data)
def supervised_learning_train_test_handler(request): resp_data = dict() process_log = [] msg = [] resp_data['process_log'] = process_log resp_data['msg'] = msg form = SupervisedLearningTrainTestForm(request.GET) # When it's valid, data from screen is converted to Python type # and stored in clean_data if form.is_valid(): sel_algorithm = form.cleaned_data['sel_algorithm'] sel_dim_reduction = form.cleaned_data['sel_dim_reduction'] n_components = form.cleaned_data['n_components'] dataset_file_name = form.cleaned_data['dataset_file_name'] column_header = form.cleaned_data['column_header'] label_file_name = form.cleaned_data['label_file_name'] label_column_header = form.cleaned_data['label_column_header'] test_size = form.cleaned_data['test_size'] sel_test_method = form.cleaned_data['sel_test_method'] n_folds = form.cleaned_data['n_folds'] is_saved = form.cleaned_data['is_saved'] model_file_name = form.cleaned_data['model_file_name'] # Dataframe for storing dataset from file. df = None if fs.is_file_in_base_location(dataset_file_name) \ and fs.is_file_in_base_location(label_file_name): # Get data file and store in data frame. data_file_path = fs.get_base_location() + dataset_file_name # dataset column header checking column_header_idx = None if column_header == "on": column_header_idx = 0 df = DataFrameUtil.convert_file_to_dataframe( data_file_path, header=column_header_idx) # PCA process # Features data X = None if sel_dim_reduction == "pca": logger.debug("Dimensionality Reduction by PCA...") pca_helper = PcaUtil() # Standardize data, reduce dimensions and return as X. X_scaled = PreProcessingUtil.fit_transform(df) X = pca_helper.reduce_dimension(X_scaled, n_components) logger.debug("PCA Done") # Label data y = None label_file_path = fs.get_base_location() + label_file_name label_column_header_idx = None if label_column_header == "on": label_column_header_idx = 0 # Use pandas to read data then change to 1D array y = pd.read_csv(label_file_path, header=label_column_header_idx).values.ravel() clf = None # Model if sel_algorithm: logger.debug("Creating model by SVM...") # Split train, test data based on specified ratio. # Select to create SVM as one vs one or one vs all clf = init_model_object(sel_algorithm) if sel_test_method: logger.debug("Starting Cross Validation...") if sel_test_method == "cv" and n_folds: scores = cross_val_score(clf, X, y, cv=n_folds) txt_accuracy = "%0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2) logger.debug(txt_accuracy) resp_data["scores"] = scores.tolist() resp_data["accuracy_mean"] = scores.mean() resp_data["params"] = clf.get_params(deep=True) else: # Set random_state here to get the same split for different run. X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=42) if is_saved == 1 and model_file_name: clf.fit(X, y) logger.debug("Save model as %s", model_file_name) saved_model_file_name = ModelUtils.save_model( clf, model_file_name) resp_data[ msg. SUCCESS] = "Model has been saved successfully as " + saved_model_file_name else: # File dataset file is not found. msg.append("File name is not found in storage.") else: resp_data[msg.ERROR] = escape(form._errors) return JsonResponse(resp_data)
def pipeline_run_handler(request): resp_data = dict() form = PipelineForm(request.GET) # When it's valid, data from screen is converted to Python type # and stored in clean_data if form.is_valid(): str_pipeline = form.cleaned_data['pipeline'] dataset_file_name = form.cleaned_data['dataset_file_name'] column_header = form.cleaned_data['column_header'] label_file_name = form.cleaned_data['label_file_name'] label_column_header = form.cleaned_data['label_column_header'] # Dimensionality Reduction pca_n_components = form.cleaned_data['pca_n_components'] kernel_pca_n_components = form.cleaned_data['kernel_pca_n_components'] lda_n_components = form.cleaned_data['lda_n_components'] tsne_n_components = form.cleaned_data['tsne_n_components'] # Test test_size = form.cleaned_data['test_size'] n_folds = form.cleaned_data['n_folds'] # Save model save_as_name = form.cleaned_data['save_as_name'] # Feature Selection sfs_k_features = form.cleaned_data['sfs_k_features'] sfs_k_neighbors = form.cleaned_data['sfs_k_neighbors'] sfs_forward = form.cleaned_data['sfs_forward'] sfs_floating = form.cleaned_data['sfs_floating'] sfs_scoring = form.cleaned_data['sfs_scoring'] sfs_cv = form.cleaned_data['sfs_cv'] sfs_n_jobs = form.cleaned_data['sfs_n_jobs'] select_k_best_n_k = form.cleaned_data['select_k_best_n_k'] stratified_kfold_n_split = form.cleaned_data[ 'stratified_kfold_n_split'] stratified_kfold_shuffle = form.cleaned_data[ 'stratified_kfold_shuffle'] # Dataframe for storing dataset from file. df = pd.DataFrame() if fs.is_file_in_base_location(dataset_file_name): # and fs.is_file_in_base_location(label_file_name): # Get data file and store in data frame. data_file_path = fs.get_base_location() + dataset_file_name # dataset column header checking column_header_idx = None if column_header == "on": column_header_idx = 0 df = DataFrameUtil.convert_file_to_dataframe( data_file_path, header=column_header_idx) # PCA process # Features data X = df # Label data y = None # Use pandas to read data then change to 1D array if fs.is_file_in_base_location(label_file_name): label_column_header_idx = None if label_column_header == "on": label_column_header_idx = 0 label_file_path = fs.get_base_location() + label_file_name y = pd.read_csv(label_file_path, header=label_column_header_idx).values.ravel() # process pipeline arr_pipeline = str_pipeline.split(",") parameters = dict() parameters['n_folds'] = n_folds parameters['pca_n_components'] = pca_n_components parameters['kernel_pca_n_components'] = kernel_pca_n_components parameters['lda_n_components'] = lda_n_components parameters['tsne_n_components'] = tsne_n_components parameters['test_size'] = test_size parameters['select_k_best_n_k'] = select_k_best_n_k parameters['stratified_kfold_n_split'] = stratified_kfold_n_split parameters['stratified_kfold_shuffle'] = stratified_kfold_shuffle if sfs_k_features != "": # In case of feature selection, plot result as table # Feature Selection parameters['sfs_k_neighbors'] = sfs_k_neighbors parameters['sfs_k_features'] = sfs_k_features parameters['sfs_forward'] = sfs_forward parameters['sfs_floating'] = sfs_floating parameters['sfs_scoring'] = sfs_scoring parameters['sfs_cv'] = sfs_cv parameters['sfs_n_jobs'] = sfs_n_jobs parameters['feature_names'] = df.columns result, X, y, model = process_pipeline(arr_pipeline, X, y, parameters) print(X) print(y) resp_data = result if save_as_name != "": # If model is not fitted yet, fit the model and save if not ModelUtils.is_fitted(model): model.fit(X, y) save_as_name = ModelUtils.save_model(model, save_as_name) resp_data[ msg. SUCCESS] = "Model has been save successfully as " + save_as_name # Display table that list feature in order. if isinstance(X, np.ndarray) and X.any() \ or isinstance(X, pd.DataFrame) and not X.empty: # Check X dimension nD = X.shape[1] if nD == 2: # For 2D # pca_helper = PcaUtil() # X2d = pca_helper.reduce_dimension(X, n_components=2) df_plot = pd.DataFrame(data=X, columns=['x', 'y']) # df_label = pd.DataFrame(data=y, columns=['label']) df_plot['label'] = y resp_data['plot_data'] = df_plot.to_json() resp_data['dimension'] = 2 elif nD == 3: # For 3D # X3d = pca_helper.reduce_dimension(X, n_components=3) df_plot = pd.DataFrame(data=X, columns=['x', 'y', 'z']) # df_label = pd.DataFrame(data=y, columns=['label']) # df_plot = df_plot.join(df_label) df_plot['label'] = y resp_data['plot_data'] = df_plot.to_json() resp_data['dimension'] = 3 elif nD > 3: # Default to 3D pca_helper = PcaUtil() X = pca_helper.reduce_dimension(X, n_components=3) df_plot = pd.DataFrame(data=X, columns=['x', 'y', 'z']) df_label = pd.DataFrame(data=y, columns=['label']) df_plot = df_plot.join(df_label) resp_data['plot_data'] = df_plot.to_json() resp_data['dimension'] = 3 else: # File dataset file is not found. resp_data[msg.ERROR] = "File name is not found in storage." else: resp_data[msg.ERROR] = escape(form._errors) return JsonResponse(resp_data, safe=False)