Ejemplo n.º 1
0
class PrepareConnection(QWidget, Ui_Prepare):
    def __init__(self, parent=None):
        super(PrepareConnection, self).__init__(parent)
        self.setupUi(self)
        self.data_container = DataContainer()

        self.buttonLoad.clicked.connect(self.LoadData)
        self.buttonRemove.clicked.connect(self.RemoveNonValidValue)
        self.checkSeparate.clicked.connect(self.SetSeparateStatus)
        self.spinBoxSeparate.setEnabled(False)

        self.buttonSave.clicked.connect(self.CheckAndSave)

    def UpdateTable(self):
        if self.data_container.GetArray().size == 0:
            return

        self.tableFeature.setRowCount(len(self.data_container.GetCaseName()))
        header_name = deepcopy(self.data_container.GetFeatureName())
        header_name.insert(0, 'Label')
        self.tableFeature.setColumnCount(len(header_name))
        self.tableFeature.setHorizontalHeaderLabels(header_name)
        self.tableFeature.setVerticalHeaderLabels(
            list(map(str, self.data_container.GetCaseName())))

        for row_index in range(len(self.data_container.GetCaseName())):
            for col_index in range(len(header_name)):
                if col_index == 0:
                    self.tableFeature.setItem(
                        row_index, col_index,
                        QTableWidgetItem(
                            str(self.data_container.GetLabel()[row_index])))
                else:
                    self.tableFeature.setItem(
                        row_index, col_index,
                        QTableWidgetItem(
                            str(self.data_container.GetArray()[row_index,
                                                               col_index -
                                                               1])))

        text = "The number of cases: {:d}\n".format(
            len(self.data_container.GetCaseName()))
        text += "The number of features: {:d}\n".format(
            len(self.data_container.GetFeatureName()))
        if len(np.unique(self.data_container.GetLabel())) == 2:
            positive_number = len(
                np.where(self.data_container.GetLabel() == np.max(
                    self.data_container.GetLabel()))[0])
            negative_number = len(
                self.data_container.GetLabel()) - positive_number
            assert (positive_number + negative_number == len(
                self.data_container.GetLabel()))
            text += "The number of positive samples: {:d}\n".format(
                positive_number)
            text += "The number of negative samples: {:d}\n".format(
                negative_number)
        self.textInformation.setText(text)

    def LoadData(self):
        dlg = QFileDialog()
        file_name, _ = dlg.getOpenFileName(self,
                                           'Open SCV file',
                                           filter="csv files (*.csv)")
        try:
            self.data_container.Load(file_name)
        except:
            print('Error')

        self.UpdateTable()

    def RemoveNonValidValue(self):
        if self.radioRemoveNonvalidCases.isChecked():
            self.data_container.RemoveUneffectiveCases()
        elif self.radioRemoveNonvalidFeatures.isChecked():
            self.data_container.RemoveUneffectiveFeatures()

        self.UpdateTable()

    def SetSeparateStatus(self):
        if self.checkSeparate.isChecked():
            self.spinBoxSeparate.setEnabled(True)
        else:
            self.spinBoxSeparate.setEnabled(False)

    def CheckAndSave(self):
        if self.data_container.IsEmpty():
            QMessageBox.warning(self, "Warning", "There is no data",
                                QMessageBox.Ok)
        elif self.data_container.HasNonValidNumber():
            QMessageBox.warning(self, "Warning", "There are nan items",
                                QMessageBox.Ok)
            non_valid_number_Index = self.data_container.FindNonValidNumberIndex(
            )
            old_edit_triggers = self.tableFeature.editTriggers()
            self.tableFeature.setEditTriggers(QAbstractItemView.CurrentChanged)
            self.tableFeature.setCurrentCell(non_valid_number_Index[0],
                                             non_valid_number_Index[1] + 1)
            self.tableFeature.setEditTriggers(old_edit_triggers)
        elif self.checkSeparate.isChecked():
            percentage_testing_data = self.spinBoxSeparate.value()
            folder_name = QFileDialog.getExistingDirectory(self, "Save data")
            if folder_name != '':
                data_seperate = DataSeparate.DataSeparate(
                    percentage_testing_data)
                data_seperate.Run(self.data_container, folder_name)
        else:
            file_name, _ = QFileDialog.getSaveFileName(
                self, "Save data", filter="csv files (*.csv)")
            if file_name != '':
                self.data_container.Save(file_name)
Ejemplo n.º 2
0
class PrepareConnection(QWidget, Ui_Prepare):
    def __init__(self, parent=None):
        super(PrepareConnection, self).__init__(parent)
        self.setupUi(self)
        self.data_container = DataContainer()

        self.buttonLoad.clicked.connect(self.LoadData)
        self.buttonRemove.clicked.connect(self.RemoveNonValidValue)
        self.loadTestingReference.clicked.connect(
            self.LoadTestingReferenceDataContainer)
        self.clearTestingReference.clicked.connect(
            self.ClearTestingReferenceDataContainer)
        self.__testing_ref_data_container = DataContainer()
        self.checkSeparate.clicked.connect(self.SetSeparateStatus)

        self.spinBoxSeparate.setEnabled(False)
        self.logger = eclog(os.path.split(__file__)[-1]).GetLogger()

        self.loadTestingReference.setEnabled(False)
        self.clearTestingReference.setEnabled(False)

        self.buttonSave.clicked.connect(self.CheckAndSave)

    def UpdateTable(self):
        if self.data_container.GetArray().size == 0:
            return

        self.tableFeature.setRowCount(len(self.data_container.GetCaseName()))
        header_name = deepcopy(self.data_container.GetFeatureName())
        header_name.insert(0, 'Label')
        self.tableFeature.setColumnCount(len(header_name))
        self.tableFeature.setHorizontalHeaderLabels(header_name)
        self.tableFeature.setVerticalHeaderLabels(
            list(map(str, self.data_container.GetCaseName())))

        for row_index in range(len(self.data_container.GetCaseName())):
            for col_index in range(len(header_name)):
                if col_index == 0:
                    self.tableFeature.setItem(
                        row_index, col_index,
                        QTableWidgetItem(
                            str(self.data_container.GetLabel()[row_index])))
                else:
                    self.tableFeature.setItem(
                        row_index, col_index,
                        QTableWidgetItem(
                            str(self.data_container.GetArray()[row_index,
                                                               col_index -
                                                               1])))

        text = "The number of cases: {:d}\n".format(
            len(self.data_container.GetCaseName()))
        text += "The number of features: {:d}\n".format(
            len(self.data_container.GetFeatureName()))
        if len(np.unique(self.data_container.GetLabel())) == 2:
            positive_number = len(
                np.where(self.data_container.GetLabel() == np.max(
                    self.data_container.GetLabel()))[0])
            negative_number = len(
                self.data_container.GetLabel()) - positive_number
            assert (positive_number + negative_number == len(
                self.data_container.GetLabel()))
            text += "The number of positive samples: {:d}\n".format(
                positive_number)
            text += "The number of negative samples: {:d}\n".format(
                negative_number)
        self.textInformation.setText(text)

    def LoadData(self):
        dlg = QFileDialog()
        file_name, _ = dlg.getOpenFileName(self,
                                           'Open SCV file',
                                           filter="csv files (*.csv)")
        try:
            self.data_container.Load(file_name)
            self.logger.info('Open the file ' + file_name + ' Succeed.')
        except OSError as reason:
            self.logger.log('Open SCV file Error, The reason is ' +
                            str(reason))
            QMessageBox.about(self, 'Load data Error', reason.__str__())
            print('Error!' + str(reason))
        except ValueError:
            self.logger.error('Open SCV file ' + file_name +
                              ' Failed. because of value error.')
            QMessageBox.information(self, 'Error',
                                    'The selected data file mismatch.')
        self.UpdateTable()

        self.buttonRemove.setEnabled(True)
        self.buttonSave.setEnabled(True)

    def LoadTestingReferenceDataContainer(self):
        dlg = QFileDialog()
        file_name, _ = dlg.getOpenFileName(self,
                                           'Open SCV file',
                                           filter="csv files (*.csv)")
        try:
            self.__testing_ref_data_container.Load(file_name)
            self.loadTestingReference.setEnabled(False)
            self.clearTestingReference.setEnabled(True)
            self.spinBoxSeparate.setEnabled(False)
        except OSError as reason:
            self.logger.log('Load Testing Reference Error: ' + str(reason))
            print('Error!' + str(reason))
        except ValueError:
            self.logger.error('Open SCV file ' + file_name +
                              ' Failed. because of value error.')
            QMessageBox.information(self, 'Error',
                                    'The selected data file mismatch.')

    def ClearTestingReferenceDataContainer(self):
        del self.__testing_ref_data_container
        self.__testing_ref_data_container = DataContainer()
        self.loadTestingReference.setEnabled(True)
        self.clearTestingReference.setEnabled(False)
        self.spinBoxSeparate.setEnabled(False)

    def RemoveNonValidValue(self):
        if self.radioRemoveNonvalidCases.isChecked():
            self.data_container.RemoveUneffectiveCases()
        elif self.radioRemoveNonvalidFeatures.isChecked():
            self.data_container.RemoveUneffectiveFeatures()

        self.UpdateTable()

    def SetSeparateStatus(self):
        if self.checkSeparate.isChecked():
            self.spinBoxSeparate.setEnabled(True)
            self.loadTestingReference.setEnabled(True)
            self.clearTestingReference.setEnabled(False)
        else:
            self.spinBoxSeparate.setEnabled(False)
            self.loadTestingReference.setEnabled(False)
            self.clearTestingReference.setEnabled(False)

    def CheckAndSave(self):
        if self.data_container.IsEmpty():
            QMessageBox.warning(self, "Warning", "There is no data",
                                QMessageBox.Ok)
        elif not self.data_container.IsBinaryLabel():
            QMessageBox.warning(self, "Warning", "There are not 2 Labels",
                                QMessageBox.Ok)
            non_valid_number_Index = self.data_container.FindNonValidLabelIndex(
            )
            old_edit_triggers = self.tableFeature.editTriggers()
            self.tableFeature.setEditTriggers(QAbstractItemView.CurrentChanged)
            self.tableFeature.setCurrentCell(non_valid_number_Index, 0)
            self.tableFeature.setEditTriggers(old_edit_triggers)
        elif self.data_container.HasNonValidNumber():
            QMessageBox.warning(self, "Warning", "There are nan items",
                                QMessageBox.Ok)
            non_valid_number_Index = self.data_container.FindNonValidNumberIndex(
            )
            old_edit_triggers = self.tableFeature.editTriggers()
            self.tableFeature.setEditTriggers(QAbstractItemView.CurrentChanged)
            self.tableFeature.setCurrentCell(non_valid_number_Index[0],
                                             non_valid_number_Index[1] + 1)
            self.tableFeature.setEditTriggers(old_edit_triggers)
        else:
            remove_features_with_same_value = RemoveSameFeatures()
            self.data_container = remove_features_with_same_value.Run(
                self.data_container)

            data_balance = DataBalance()
            if self.radioDownSampling.isChecked():
                data_balance = DownSampling()
            elif self.radioUpSampling.isChecked():
                data_balance = UpSampling()
            elif self.radioSmote.isChecked():
                data_balance = SmoteSampling()

            if self.checkSeparate.isChecked():
                folder_name = QFileDialog.getExistingDirectory(
                    self, "Save data")
                if folder_name != '':
                    data_separate = DataSeparate.DataSeparate()
                    try:
                        if self.__testing_ref_data_container.IsEmpty():
                            testing_data_percentage = self.spinBoxSeparate.value(
                            )
                            training_data_container, _, = data_separate.RunByTestingPercentage(
                                self.data_container, testing_data_percentage,
                                folder_name)
                        else:
                            training_data_container, _, = data_separate.RunByTestingReference(
                                self.data_container,
                                self.__testing_ref_data_container, folder_name)
                            if training_data_container.IsEmpty():
                                QMessageBox.information(
                                    self, 'Error',
                                    'The testing data does not mismatch, please check the testing data '
                                    'really exists in current data')
                                return None
                        data_balance.Run(training_data_container,
                                         store_path=folder_name)
                    except Exception as e:
                        content = 'PrepareConnection, splitting failed: '
                        self.logger.error('{}{}'.format(content, str(e)))
                        QMessageBox.about(self, content, e.__str__())

            else:
                file_name, _ = QFileDialog.getSaveFileName(
                    self, "Save data", filter="csv files (*.csv)")
                if file_name != '':
                    data_balance.Run(self.data_container, store_path=file_name)