def evaluate(self, data_frame, array_datas, classifier_algo, pre_evaluate_results=None): """ Train a classifier on multiple arrays """ result_object = ResultObject(None, None, None, CommandStatus.Error) if type(pre_evaluate_results) is not list: Printer.Print("Pre evaluation results failed! Attach bug report!") return result_object win = Window.window() if data_frame is not None: result_object = VizContainer.createResult(win, data_frame, ['cval']) elif array_datas is not None: result_object = VizContainer.createResult(win, array_datas, ['cval']) else: Printer.Print("Provide one of data frame or array datas") return result_object cv_output, aux_output = pre_evaluate_results properties, model_data = aux_output.data result_object.data = [win, properties, model_data, self.processkFoldCV] self.printkValueMessage(cv_output.data[0]) self.updateWindow(win, cv_output.data[1], cv_output.data[2], model_data[1], properties["title"]) self.modify_figure.evaluate(result_object) return result_object
def preEvaluate(self, data_frame, array_datas, classifier_algo): result_object = ResultObject(None, None, None, CommandStatus.Error) # Get the data frame sns.set(color_codes=True) if data_frame is not None: df = data_frame.data cname = data_frame.name elif array_datas is not None: command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame( array_datas) if command_status == CommandStatus.Error: print("Error in getting dataframe!") result_object.data = "Error in getting dataframe!" return result_object else: result_object.data = "Please provide data frame or arrays to analyze" return result_object # Get the ground truth array if StatContainer.ground_truth is None: result_object.data = ("Please set a feature vector to ground truth by" + "typing set ground truth before using this command") return result_object else: df = DataGuru.removeGT(df, StatContainer.ground_truth) Y = StatContainer.ground_truth.data # Remove nans: df, Y = DataGuru.removenan(df, Y) # Get the classifier model model = classifier_algo.data[0] # Code to run the classifier X = df.values # Get a standard scaler for the extracted data X scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) properties = self.createDefaultProperties() properties['title'] = cname cv_output = self.performCV(properties, X, Y, model) aux_output = (properties, [X, Y, model]) return [ResultObject(cv_output, None), ResultObject(aux_output, None)]
def evaluate(self, array_datas): """ Create a bar plot between multiple variables """ result_object = ResultObject(None, None, None, CommandStatus.Error) sns.set(color_codes=True) command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame( array_datas) if command_status == CommandStatus.Error: return ResultObject(None, None, None, CommandStatus.Error) if StatContainer.ground_truth is None: gtVals = np.ones(df.shape[0]) ground_truth = 'ground_truth' else: gtVals = StatContainer.filterGroundTruth() ground_truth = StatContainer.ground_truth.name if len(gtVals) != df.shape[0]: print("ground truth does not match with df shape") print(len(gtVals), df.shape[0]) gtVals = np.ones(df.shape[0]) ground_truth = 'ground_truth' # Remove nans: df[ground_truth] = gtVals df.dropna(inplace=True) gtVals = df[ground_truth] uniqVals = StatContainer.isCategorical(gtVals) binned_ground_truth = False if uniqVals is None and np.issubdtype(gtVals.dtype, np.number): # Convert to categorical df[ground_truth] = pd.cut(gtVals, 10) binned_ground_truth = True if binned_ground_truth is True or uniqVals is not None: gb = df.groupby(ground_truth) df_mean = gb.mean() df_errors = gb.std() if uniqVals is not None and isinstance(uniqVals[0], str): truncated_uniqVals, _ = StatContainer.removeCommonNames( df_mean.index) df_mean.index = truncated_uniqVals df_errors.index = truncated_uniqVals # Number of uniq_vals x number of arrs df_mean_shape = df_mean.shape if (not binned_ground_truth and df_mean_shape[1] >= df_mean_shape[0]): df_mean = df_mean.T df_errors = df_errors.T else: Printer.Print("Ground truth could not be mapped to", "categorical array\n") Printer.Print("Please clear or select appropriate ground truth") return result_object properties = self.createDefaultProperties() properties['title'] = cname if uniqVals is not None and isinstance(uniqVals[0], str): max_len = max([len(uniqVal) for uniqVal in uniqVals]) else: max_len = 0 if (binned_ground_truth or (uniqVals is not None and len(uniqVals) > 5 and max_len > 8)): properties["horizontal"] = True if binned_ground_truth: properties["overwrite_labels"] = True properties["ylabel"] = StatContainer.ground_truth.name win = Window.window() result_object = VizContainer.createResult(win, array_datas, ['bar']) result_object.data = [ win, properties, [df_mean, df_errors], self.updateFigure ] self.updateFigure(result_object.data) self.modify_figure.evaluate(result_object) return result_object