def evaluate(self, data_frame, array_datas): """ Run Isomap on a dataset of multiple arrays """ # Get the data frame if data_frame is not None: df = data_frame.data df = DataGuru.convertStrCols_toNumeric(df) cname = data_frame.name elif array_datas is not None: command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame( array_datas, useCategorical=True) if command_status == CommandStatus.Error: return ResultObject(None, None, None, CommandStatus.Error) else: Printer.Print("Please provide data frame or arrays to analyze") return ResultObject(None, None, None, CommandStatus.Error) Y = None if StatContainer.ground_truth is not None: df = DataGuru.removeGT(df, StatContainer.ground_truth) Y = StatContainer.filterGroundTruth() df, Y = DataGuru.removenan(df, Y) # Remove nans: else: df.dropna(inplace=True) # Get the Isomap model # Code to run the classifier X = df.values # Get a standard scaler for the extracted data X scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) # Train the classifier win = Window.window() properties = self.createDefaultProperties() properties['title'] = cname # return ResultObject(None, None, None, CommandStatus.Success) if data_frame is not None: result_object = VizContainer.createResult(win, data_frame, ['ismp']) else: result_object = VizContainer.createResult(win, array_datas, ['ismp']) result_object.data = [win, properties, [X, Y], self.updateFigure] self.updateFigure(result_object.data) self.modify_figure.evaluate(result_object) return result_object
def evaluate(self, data_frame, array_datas): """ Run pca on a dataset of multiple arrays """ # Get the data frame if data_frame is not None: df = data_frame.data df = DataGuru.convertStrCols_toNumeric(df) cname = data_frame.name elif array_datas is not None: command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame( array_datas, useCategorical=True) if command_status == CommandStatus.Error: return ResultObject(None, None, None, CommandStatus.Error) else: Printer.Print("Please provide data frame or arrays to analyze") return ResultObject(None, None, None, CommandStatus.Error) Y = None if StatContainer.ground_truth is not None: df = DataGuru.removeGT(df, StatContainer.ground_truth) Y = StatContainer.filterGroundTruth() # Remove nans: df, Y = DataGuru.removenan(df, Y) else: df.dropna(inplace=True) # Code to run the classifier X = df.values # Get a standard scaler for the extracted data X scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) # Train the classifier pca = PCA(n_components=2) pca_res = pca.fit_transform(X) win = Window.window() f = win.gcf() ax = f.add_subplot(111) if Y is None: sc = ax.scatter(pca_res[:, 0], pca_res[:, 1], cmap="jet", edgecolor="None", alpha=0.35) else: sc = ax.scatter(pca_res[:, 0], pca_res[:, 1], c=Y, cmap="jet", edgecolor="None", alpha=0.35) cbar = plt.colorbar(sc) cbar.ax.get_yaxis().labelpad = 15 cbar.ax.set_ylabel(StatContainer.ground_truth.name, rotation=270) ax.set_title(cname) win.show() # return ResultObject(None, None, None, CommandStatus.Success) if data_frame is not None: return VizContainer.createResult(win, data_frame, ['pca']) else: return VizContainer.createResult(win, array_datas, ['pca'])
def evaluate(self, data_frame, array_datas, target): """ Run clustering on a dataset of multiple arrays """ # Get the data frame if data_frame is not None: df = data_frame.data cname = data_frame.name elif array_datas is not None: command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame( array_datas, useCategorical=True) if command_status == CommandStatus.Error: return ResultObject(None, None, None, CommandStatus.Error) else: Printer.Print("Please provide data frame or arrays to analyze") return ResultObject(None, None, None, CommandStatus.Error) Y = None if StatContainer.ground_truth is not None: df = DataGuru.removeGT(df, StatContainer.ground_truth) Y = StatContainer.filterGroundTruth() # Remove nans: df, Y = DataGuru.removenan(df, Y) else: df.dropna(inplace=True) # Get the tsne model # Code to run the classifier X = df.values # Get a standard scaler for the extracted data X scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) # Train the classifier numbers = findNumbers(target.data, 1) if numbers != [] and numbers[0].data > 0: num_clusters = int(numbers[0].data) else: num_clusters = 2 # If not specified use 2 clusters kY = self.performOperation(X, num_clusters) result_objects = [] if StatContainer.ground_truth is not None: df_res = pd.DataFrame() df_res['ground_truth'] = Y df_res['clustering_result'] = kY df_res.pivot_table(index=df_res.columns[0], columns=df_res.columns[1], aggfunc=np.size, fill_value=0) win = Window.window() f = win.gcf() ax = f.add_subplot(111) df_res = DataGuru.convertStrCols_toNumeric(df_res) sns.heatmap(df_res, ax=ax) win.show() if data_frame is not None: result_object = VizContainer.createResult( win, data_frame, ['clstr.fig']) else: result_object = VizContainer.createResult( win, array_datas, ['clstr.fig']) result_objects.append(result_object) result_object = ResultObject(kY, [], DataType.array, CommandStatus.Success) result_object.createName(cname, command_name="clstr", set_keyword_list=True) result_objects.append(result_object) return result_objects