print(os.getcwd()) column_list = [ 'sample_number', 'positive_number', 'negative_number', 'auc', 'auc 95% CIs', 'accuracy', 'feature_number', 'Yorden Index', 'accuracy', 'sensitivity', 'specificity', 'positive predictive value', 'negative predictive value' ] df = pd.DataFrame(columns=column_list) # Set Feature Selector List feature_selector_list = [] feature_selector_list.append( FeatureSelectPipeline([ RemoveSameFeatures(), RemoveCosSimilarityFeatures(), FeatureSelectByANOVA() ])) feature_selector_list.append( FeatureSelectPipeline([ RemoveSameFeatures(), RemoveCosSimilarityFeatures(), FeatureSelectByRelief() ])) feature_selector_list.append( FeatureSelectPipeline([ RemoveSameFeatures(), RemoveCosSimilarityFeatures(), FeatureSelectByRFE() ]))
def CheckAndSave(self): if self.data_container.IsEmpty(): QMessageBox.warning(self, "Warning", "There is no data", QMessageBox.Ok) elif not self.data_container.IsBinaryLabel(): QMessageBox.warning(self, "Warning", "There are not 2 Labels", QMessageBox.Ok) non_valid_number_Index = self.data_container.FindNonValidLabelIndex( ) old_edit_triggers = self.tableFeature.editTriggers() self.tableFeature.setEditTriggers(QAbstractItemView.CurrentChanged) self.tableFeature.setCurrentCell(non_valid_number_Index, 0) self.tableFeature.setEditTriggers(old_edit_triggers) elif self.data_container.HasNonValidNumber(): QMessageBox.warning(self, "Warning", "There are nan items", QMessageBox.Ok) non_valid_number_Index = self.data_container.FindNonValidNumberIndex( ) old_edit_triggers = self.tableFeature.editTriggers() self.tableFeature.setEditTriggers(QAbstractItemView.CurrentChanged) self.tableFeature.setCurrentCell(non_valid_number_Index[0], non_valid_number_Index[1] + 1) self.tableFeature.setEditTriggers(old_edit_triggers) else: remove_features_with_same_value = RemoveSameFeatures() self.data_container = remove_features_with_same_value.Run( self.data_container) data_balance = DataBalance() if self.radioDownSampling.isChecked(): data_balance = DownSampling() elif self.radioUpSampling.isChecked(): data_balance = UpSampling() elif self.radioSmote.isChecked(): data_balance = SmoteSampling() if self.checkSeparate.isChecked(): folder_name = QFileDialog.getExistingDirectory( self, "Save data") if folder_name != '': data_separate = DataSeparate.DataSeparate() try: if self.__testing_ref_data_container.IsEmpty(): testing_data_percentage = self.spinBoxSeparate.value( ) training_data_container, _, = data_separate.RunByTestingPercentage( self.data_container, testing_data_percentage, folder_name) else: training_data_container, _, = data_separate.RunByTestingReference( self.data_container, self.__testing_ref_data_container, folder_name) if training_data_container.IsEmpty(): QMessageBox.information( self, 'Error', 'The testing data does not mismatch, please check the testing data ' 'really exists in current data') return None data_balance.Run(training_data_container, store_path=folder_name) except Exception as e: content = 'PrepareConnection, splitting failed: ' self.logger.error('{}{}'.format(content, str(e))) QMessageBox.about(self, content, e.__str__()) else: file_name, _ = QFileDialog.getSaveFileName( self, "Save data", filter="csv files (*.csv)") if file_name != '': data_balance.Run(self.data_container, store_path=file_name)
def CheckAndSave(self): if self.data_container.IsEmpty(): QMessageBox.warning(self, "Warning", "There is no data", QMessageBox.Ok) elif not self.data_container.IsBinaryLabel(): QMessageBox.warning(self, "Warning", "There are not 2 Labels", QMessageBox.Ok) non_valid_number_index = self.data_container.FindInvalidLabelIndex( ) old_edit_triggers = self.tableFeature.editTriggers() self.tableFeature.setEditTriggers(QAbstractItemView.CurrentChanged) self.tableFeature.setCurrentCell(non_valid_number_index, 0) self.tableFeature.setEditTriggers(old_edit_triggers) elif self.data_container.HasInvalidNumber(): QMessageBox.warning(self, "Warning", "There are nan items", QMessageBox.Ok) non_valid_number_index = self.data_container.FindInvalidNumberIndex( ) old_edit_triggers = self.tableFeature.editTriggers() self.tableFeature.setEditTriggers(QAbstractItemView.CurrentChanged) self.tableFeature.setCurrentCell(non_valid_number_index[0], non_valid_number_index[1] + 1) self.tableFeature.setEditTriggers(old_edit_triggers) else: remove_features_with_same_value = RemoveSameFeatures() self.data_container = remove_features_with_same_value.Run( self.data_container) if self.radioSplitRandom.isChecked( ) or self.radioSplitRef.isChecked(): folder_name = QFileDialog.getExistingDirectory( self, "Save data") if folder_name != '': data_separate = DataSeparate.DataSeparate() try: if self.__testing_ref_data_container.IsEmpty(): testing_data_percentage = self.spinBoxSeparate.value( ) if self.__clinical_ref.size == 0: training_data_container, _, = \ data_separate.RunByTestingPercentage(self.data_container, testing_data_percentage, store_folder=folder_name) else: training_data_container, _, = \ data_separate.RunByTestingPercentage(self.data_container, testing_data_percentage, clinic_df=self.__clinical_ref, store_folder=folder_name) else: training_data_container, _, = \ data_separate.RunByTestingReference(self.data_container, self.__testing_ref_data_container, folder_name) if training_data_container.IsEmpty(): QMessageBox.information( self, 'Error', 'The testing data does not mismatch, please check the testing data ' 'really exists in current data') return None os.system("explorer.exe {:s}".format( os.path.normpath(folder_name))) except Exception as e: content = 'PrepareConnection, splitting failed: ' eclog(self._filename).GetLogger().error( 'Split Error: ' + e.__str__()) QMessageBox.about(self, content, e.__str__()) else: file_name, _ = QFileDialog.getSaveFileName( self, "Save data", filter="csv files (*.csv)") if file_name: self.data_container.Save(file_name)