Ejemplo n.º 1
0
    def UpdateROC(self):
        if not self.__is_ui_ready:
            return
        if (self.comboNormalizer.count() == 0) or \
                (self.comboDimensionReduction.count() == 0) or \
                (self.comboFeatureSelector.count() == 0) or \
                (self.comboClassifier.count() == 0) or \
                (self.spinBoxFeatureNumber.value() == 0):
            return

        case_name = self.comboNormalizer.currentText() + '_' + \
                    self.comboDimensionReduction.currentText() + '_' + \
                    self.comboFeatureSelector.currentText() + '_' + \
                    str(self.spinBoxFeatureNumber.value()) + '_' + \
                    self.comboClassifier.currentText()

        case_folder = os.path.join(self._root_folder, case_name)

        pred_list, label_list, name_list = [], [], []
        if self.checkROCCVTrain.isChecked():
            train_pred = np.load(os.path.join(case_folder,
                                              'train_predict.npy'))
            train_label = np.load(os.path.join(case_folder, 'train_label.npy'))
            pred_list.append(train_pred)
            label_list.append(train_label)
            name_list.append('CV Train')
        if self.checkROCCVValidation.isChecked():
            val_pred = np.load(os.path.join(case_folder, 'val_predict.npy'))
            val_label = np.load(os.path.join(case_folder, 'val_label.npy'))
            pred_list.append(val_pred)
            label_list.append(val_label)
            name_list.append('CV Validation')
        if self.checkROCTrain.isChecked():
            all_train_pred = np.load(
                os.path.join(case_folder, 'all_train_predict.npy'))
            all_train_label = np.load(
                os.path.join(case_folder, 'all_train_label.npy'))
            pred_list.append(all_train_pred)
            label_list.append(all_train_label)
            name_list.append('Train')
        if self.checkROCTest.isChecked():
            if os.path.exists(os.path.join(case_folder, 'test_label.npy')):
                test_pred = np.load(
                    os.path.join(case_folder, 'test_predict.npy'))
                test_label = np.load(
                    os.path.join(case_folder, 'test_label.npy'))
                pred_list.append(test_pred)
                label_list.append(test_label)
                name_list.append('Test')

        if len(pred_list) > 0:
            DrawROCList(pred_list,
                        label_list,
                        name_list=name_list,
                        is_show=False,
                        fig=self.canvasROC.getFigure())

        self.canvasROC.draw()
Ejemplo n.º 2
0
    def UpdateROC(self):
        if (self.comboNormalizer.count() == 0) or \
                (self.comboDimensionReduction.count() == 0) or \
                (self.comboFeatureSelector.count() == 0) or \
                (self.comboClassifier.count() == 0) or \
                (self.spinBoxFeatureNumber.value() == 0):
            return

        case_name = self.comboNormalizer.currentText() + '_' + \
                    self.comboDimensionReduction.currentText() + '_' + \
                    self.comboFeatureSelector.currentText() + '_' + \
                    str(self.spinBoxFeatureNumber.value()) + '_' + \
                    self.comboClassifier.currentText()

        case_folder = os.path.join(self._root_folder, case_name)
        try:
            self._current_pipeline.LoadPipeline(
                os.path.join(case_folder, 'pipeline_info.csv'))
        except Exception as ex:
            QMessageBox.about(self, "Load Error", ex.__str__())
            self.logger.log('Load Pipeline Error, The reason is ' + str(ex))

        pred_list, label_list, name_list = [], [], []
        if self.checkROCTrain.isChecked():
            train_pred = np.load(os.path.join(case_folder,
                                              'train_predict.npy'))
            train_label = np.load(os.path.join(case_folder, 'train_label.npy'))
            pred_list.append(train_pred)
            label_list.append(train_label)
            name_list.append('train')
        if self.checkROCValidation.isChecked():
            val_pred = np.load(os.path.join(case_folder, 'val_predict.npy'))
            val_label = np.load(os.path.join(case_folder, 'val_label.npy'))
            pred_list.append(val_pred)
            label_list.append(val_label)
            name_list.append('validation')
        if self.checkROCTest.isChecked():
            if os.path.exists(os.path.join(case_folder, 'test_label.npy')):
                test_pred = np.load(
                    os.path.join(case_folder, 'test_predict.npy'))
                test_label = np.load(
                    os.path.join(case_folder, 'test_label.npy'))
                pred_list.append(test_pred)
                label_list.append(test_label)
                name_list.append('Test')

        if len(pred_list) > 0:
            DrawROCList(pred_list,
                        label_list,
                        name_list=name_list,
                        is_show=False,
                        fig=self.canvasROC.getFigure())

        self.canvasROC.draw()
Ejemplo n.º 3
0
    def UpdateROC(self):
        if not self.__is_ui_ready:
            return
        if (self.comboNormalizer.count() == 0) or \
                (self.comboDimensionReduction.count() == 0) or \
                (self.comboFeatureSelector.count() == 0) or \
                (self.comboClassifier.count() == 0) or \
                (self.spinBoxFeatureNumber.value() == 0):
            return

        pipeline_name = self._fae.GetStoreName(
            self.comboNormalizer.currentText(),
            self.comboDimensionReduction.currentText(),
            self.comboFeatureSelector.currentText(),
            str(self.spinBoxFeatureNumber.value()),
            self.comboClassifier.currentText())
        cls_folder = self._fae.SplitFolder(pipeline_name, self._root_folder)[3]

        pred_list, label_list, name_list = [], [], []
        if self.checkROCCVTrain.isChecked():
            self.__AddOneCurveInRoc(pred_list, label_list, name_list,
                                    cls_folder, CV_TRAIN)
        if self.checkROCCVValidation.isChecked():
            self.__AddOneCurveInRoc(pred_list, label_list, name_list,
                                    cls_folder, CV_VAL)
        if self.checkROCTrain.isChecked():
            self.__AddOneCurveInRoc(pred_list, label_list, name_list,
                                    cls_folder, TRAIN)
        if self.checkROCTest.isChecked():
            self.__AddOneCurveInRoc(pred_list, label_list, name_list,
                                    cls_folder, TEST)

        if len(pred_list) > 0:
            DrawROCList(pred_list,
                        label_list,
                        name_list=name_list,
                        is_show=False,
                        fig=self.canvasROC.getFigure())

        self.canvasROC.draw()
Ejemplo n.º 4
0
    def Run(self, data_container, test_data_container=DataContainer(), store_folder=''):
        train_pred_list, train_label_list, val_pred_list, val_label_list = [], [], [], []

        data = data_container.GetArray()
        label = data_container.GetLabel()
        val_index_store = []

        for train_index, val_index in self.__cv.split(data, label):
            val_index_store.extend(val_index)

            train_data = data[train_index, :]
            train_label = label[train_index]
            val_data = data[val_index, :]
            val_label = label[val_index]

            self.__classifier.SetData(train_data, train_label)
            self.__classifier.Fit()

            train_prob = self.__classifier.Predict(train_data)
            val_prob = self.__classifier.Predict(val_data)

            train_pred_list.extend(train_prob)
            train_label_list.extend(train_label)
            val_pred_list.extend(val_prob)
            val_label_list.extend(val_label)

        total_train_label = np.asarray(train_label_list, dtype=np.uint8)
        total_train_pred = np.asarray(train_pred_list, dtype=np.float32)
        train_metric = EstimateMetirc(total_train_pred, total_train_label, 'train')

        total_label = np.asarray(val_label_list, dtype=np.uint8)
        total_pred = np.asarray(val_pred_list, dtype=np.float32)
        val_metric = EstimateMetirc(total_pred, total_label, 'val')

        self.__classifier.SetDataContainer(data_container)
        self.__classifier.Fit()

        test_metric = {}
        if test_data_container.GetArray().size > 0:
            test_data = test_data_container.GetArray()
            test_label = test_data_container.GetLabel()
            test_pred = self.__classifier.Predict(test_data)

            test_metric = EstimateMetirc(test_pred, test_label, 'test')

        if store_folder:
            if not os.path.exists(store_folder):
                os.mkdir(store_folder)

            info = {}
            info.update(train_metric)
            info.update(val_metric)

            np.save(os.path.join(store_folder, 'train_predict.npy'), total_train_pred)
            np.save(os.path.join(store_folder, 'val_predict.npy'), total_pred)
            np.save(os.path.join(store_folder, 'train_label.npy'), total_train_label)
            np.save(os.path.join(store_folder, 'val_label.npy'), total_label)

            cv_info_path = os.path.join(store_folder, 'cv_info.csv')
            df = pd.DataFrame(data=val_index_store)
            df.to_csv(cv_info_path)

            DrawROCList(total_train_pred, total_train_label, store_path=os.path.join(store_folder, 'train_ROC.jpg'), is_show=False)
            DrawROCList(total_pred, total_label, store_path=os.path.join(store_folder, 'val_ROC.jpg'), is_show=False)

            if test_data_container.GetArray().size > 0:
                info.update(test_metric)
                np.save(os.path.join(store_folder, 'test_predict.npy'), test_pred)
                np.save(os.path.join(store_folder, 'test_label.npy'), test_label)
                DrawROCList(test_pred, test_label, store_path=os.path.join(store_folder, 'test_ROC.jpg'),
                            is_show=False)

            self.__classifier.Save(store_folder)
            self.SaveResult(info, store_folder)

        return train_metric, val_metric, test_metric
Ejemplo n.º 5
0
    def Run(self, training_data_container, pipeline, result_folder, store_folder, testing_data_container=DataContainer()):
        # Data Description
        data_description_text = "    "
        if len(np.unique(training_data_container.GetLabel())) != 2:
            print('Only works for the 2-label classification')
            return False
        positive_number = len(
            np.where(training_data_container.GetLabel() == np.max(training_data_container.GetLabel()))[0])
        negative_number = len(training_data_container.GetLabel()) - positive_number

        data_description_text += "We selected {:d} cases as the training data set. {:d} of them were marked as positive and the left {:d} " \
               "were marked as negative. ".format(len(training_data_container.GetCaseName()), positive_number, negative_number)
        if testing_data_container.IsEmpty():
            data_description_text += "Since the number of the samples were limited, there were no independent testing data. "
        else:
            positive_number = len(
                np.where(testing_data_container.GetLabel() == np.max(testing_data_container.GetLabel()))[0])
            negative_number = len(testing_data_container.GetLabel()) - positive_number
            data_description_text += "We also selected another {:d} cases as the independent testing data set ({:d}/{:d} = positive/negative). \n" \
                    "".format(len(testing_data_container.GetCaseName()), positive_number, negative_number)

        # Method Description
        method_description_text = "    "
        method_description_text += pipeline.GetNormalizer().GetDescription()
        method_description_text += pipeline.GetDimensionReduction().GetDescription()
        method_description_text += pipeline.GetFeatureSelector().GetDescription()
        method_description_text += pipeline.GetClassifier().GetDescription()
        method_description_text += pipeline.GetCrossValidatiaon().GetDescription()
        method_description_text += "\n"

        statistic_description_text = "    The performance of the model was evaluated using receiver operating characteristic " \
                                     "(ROC) curve analysis. The area under the ROC curve (AUC) was calculated for quantification. " \
                                     "The accuracy, sensitivity, specificity, positive predictive value (PPV), and negative " \
                                     "predictive value (NPV) were also calculated at a cutoff value that maximum the " \
                                     "value of the Yorden index. We also boosted estimation 1000 times and applied paired " \
                                     "t-test to give the 95% confidence interval. All above processes were implemented with " \
                                     "FeAture Explorer (FAE, v0.2.2, https://github.com/salan668/FAE) on Python (3.5.4, https://www.python.org/). \n"

        # Result Description
        result_folder = os.path.join(result_folder, pipeline.GetStoreName())
        result = pd.read_csv(os.path.join(result_folder, 'result.csv'), index_col=0)
        train_pred = np.load(os.path.join(result_folder, 'train_predict.npy'))
        train_label = np.load(os.path.join(result_folder, 'train_label.npy'))
        val_pred = np.load(os.path.join(result_folder, 'val_predict.npy'))
        val_label = np.load(os.path.join(result_folder, 'val_label.npy'))

        from FAE.Visualization.DrawROCList import DrawROCList
        if not testing_data_container.IsEmpty():
            result_description_text = "We found that the model based on {:d} features can get the highest AUC on the " \
                                      "validation data set. The AUC and the accuracy could achieve {:.3f} and {:.3f}, respectively. In this point, " \
                                      "The AUC and the accuracy of the model achieve {:.3f} and {:.3f} on testing data set. " \
                                      "The clinical statistics in the diagonsis and the selected features were shown in Table 1 and Table 2. " \
                                      "The ROC curve was shown in Figure 1. \n" \
                                      "".format(pipeline.GetFeatureSelector().GetSelectedFeatureNumber(),
                                                float(result.loc['val_auc'].values),
                                                float(result.loc['val_accuracy'].values),
                                                float(result.loc['test_auc'].values),
                                                float(result.loc['test_accuracy'].values)
                                                )

            test_pred = np.load(os.path.join(result_folder, 'test_predict.npy'))
            test_label = np.load(os.path.join(result_folder, 'test_label.npy'))
            DrawROCList([train_pred, val_pred, test_pred], [train_label, val_label, test_label], name_list=['train', 'val', 'test'],
                        store_path=os.path.join(store_folder, 'ROC.jpg'), is_show=False)
        else:
            result_description_text = "We found that the model based on {:d} features can get the highest AUC on the " \
                                      "validation data set. The AUC and the accuracy could achieve {:.3f} and {:.3f}, respectively. " \
                                      "The clinical statistics in the diagonsis and the selected features were shown in Table 1 and Table 2. " \
                                      "The ROC curve was shown in Figure 1. \n" \
                                      "".format(pipeline.GetFeatureSelector().GetSelectedFeatureNumber(),
                                                float(result.loc['val_auc'].values), float(result.loc['val_accuracy'].values))
            DrawROCList([train_pred, val_pred], [train_label, val_label], name_list=['train', 'val'],
                        store_path=os.path.join(store_folder, 'ROC.jpg'), is_show=False)
            pass

        from reportlab.lib import colors
        table_stype = (
            ('FONT', (0, 0), (-1, -1), '%s' % 'Helvetica', 9),
            ('LINEABOVE', (0, 0), (-1, 0), 1, colors.black),
            ('LINEABOVE', (0, 1), (-1, 1), 1, colors.black),
            ('LINEBELOW', (0, -1), (-1, -1), 1, colors.black),
            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
            ('ALIGN', (0, 0), (-1, -1), 'CENTER')
        )
        table_1_header = "Table 1. Clinical statistics in the diagnosis. "
        if testing_data_container.IsEmpty():
            table_1 = [['Statistics', 'Value'],
                       ['Accuracy', str(result.loc['val_accuracy'].values[0])],
                       ['AUC', str(result.loc['val_auc'].values[0])],
                       ['AUC 95% CIs', str(result.loc['val_auc 95% CIs'].values[0])],
                       ['NPV', str(result.loc['val_negative predictive value'].values[0])],
                       ['PPV', str(result.loc['val_positive predictive value'].values[0])],
                       ['Sensitivity', str(result.loc['val_sensitivity'].values[0])],
                       ['Specificity', str(result.loc['val_specificity'].values[0])]]
        else:
            table_1 = [['Statistics', 'Value'],
                       ['Accuracy', str(result.loc['test_accuracy'].values[0])],
                       ['AUC', str(result.loc['test_auc'].values[0])],
                       ['AUC 95% CIs', str(result.loc['test_auc 95% CIs'].values[0])],
                       ['NPV', str(result.loc['test_negative predictive value'].values[0])],
                       ['PPV', str(result.loc['test_positive predictive value'].values[0])],
                       ['Sensitivity', str(result.loc['test_sensitivity'].values[0])],
                       ['Specificity', str(result.loc['test_specificity'].values[0])]]

        candidate_file = glob.glob(os.path.join(result_folder, '*coef.csv'))
        if len(candidate_file) > 0:
            coef = pd.read_csv(candidate_file[0], index_col=0, header=0)
            table_2_header = 'Table 2. The coefficients of features in the model. '
            table_2 = [['Features', 'Coef in model']]
            for index in coef.index:
                table_2.append([str(index), "{:.3f}".format(coef.loc[index].values[0])])

        else:
            with open(os.path.join(result_folder, 'feature_select_info.csv'), 'r', newline='') as file:
                reader = csv.reader(file)
                for row in reader:
                    if row[0] == 'selected_feature':
                        features = row[1:]
            table_2_header = 'Table 2. The selected of features. '
            table_2 = [['Features', 'Rank']]
            for index in range(len(features)):
                table_2.append([features[index], str(index + 1)])

        figure_title = "Figure 1. The ROC curve. "

        # Build PDF
        pdf = PDFDocument(os.path.join(store_folder, 'report.pdf'))
        pdf.init_report()
        pdf.h1("Materials and Methods")
        pdf.p(data_description_text)
        pdf.p(method_description_text)
        pdf.p(statistic_description_text)

        pdf.h1("Result")
        pdf.p(result_description_text)
        pdf.table_header(table_1_header)
        pdf.table(table_1, 130, style=table_stype)
        pdf.table_header(table_2_header)
        pdf.table(table_2, 200, style=table_stype)
        pdf.p("\n\n")
        pdf.image(os.path.join(store_folder, 'ROC.jpg'))
        pdf.table_header(figure_title)

        pdf.end_connect("Thanks for using FAE v.0.2. If you need a specific report, please connect to Yang Song ([email protected]) or Guang Yang "
              "([email protected]). Welcome any co-operation and discussion. ")
        pdf.generate()
Ejemplo n.º 6
0
    def Run(self,
            data_container,
            test_data_container=DataContainer(),
            store_folder=''):
        train_pred_list, train_label_list, val_pred_list, val_label_list = [], [], [], []

        data = data_container.GetArray()
        label = data_container.GetLabel()

        for train_index, val_index in self.__cv.split(data, label):
            train_data = data[train_index, :]
            train_label = label[train_index]
            val_data = data[val_index, :]
            val_label = label[val_index]

            self.__classifier.SetData(train_data, train_label)
            self.__classifier.Fit()

            train_prob = self.__classifier.Predict(train_data)
            val_prob = self.__classifier.Predict(val_data)

            train_pred_list.extend(train_prob)
            train_label_list.extend(train_label)
            val_pred_list.extend(val_prob)
            val_label_list.extend(val_label)

        total_train_label = np.asarray(train_label_list, dtype=np.uint8)
        total_train_pred = np.asarray(train_pred_list, dtype=np.float32)
        train_metric = EstimateMetirc(total_train_pred, total_train_label,
                                      'train')

        total_label = np.asarray(val_label_list, dtype=np.uint8)
        total_pred = np.asarray(val_pred_list, dtype=np.float32)
        val_metric = EstimateMetirc(total_pred, total_label, 'val')

        self.__classifier.SetDataContainer(data_container)
        self.__classifier.Fit()

        test_metric = {}
        if test_data_container.GetArray().size > 0:
            selected_feature_name = data_container.GetFeatureName()
            fs = FeatureSelector()
            test_data_container = fs.SelectFeatureByName(
                test_data_container, selected_feature_name)

            test_data = test_data_container.GetArray()
            test_label = test_data_container.GetLabel()
            test_pred = self.__classifier.Predict(test_data)

            test_metric = EstimateMetirc(test_pred, test_label, 'test')

        if store_folder and os.path.isdir(store_folder):
            info = {}
            info.update(train_metric)
            info.update(val_metric)

            np.save(os.path.join(store_folder, 'train_predict.npy'),
                    total_train_pred)
            np.save(os.path.join(store_folder, 'val_predict.npy'), total_pred)
            np.save(os.path.join(store_folder, 'train_label.npy'),
                    total_train_label)
            np.save(os.path.join(store_folder, 'val_label.npy'), total_label)

            DrawROCList(total_train_pred,
                        total_train_label,
                        store_path=os.path.join(store_folder, 'train_ROC.jpg'),
                        is_show=False)
            DrawROCList(total_pred,
                        total_label,
                        store_path=os.path.join(store_folder, 'val_ROC.jpg'),
                        is_show=False)

            if test_data_container.GetArray().size > 0:
                info.update(test_metric)
                np.save(os.path.join(store_folder, 'test_predict.npy'),
                        test_pred)
                np.save(os.path.join(store_folder, 'test_label.npy'),
                        test_label)
                DrawROCList(test_pred,
                            test_label,
                            store_path=os.path.join(store_folder,
                                                    'test_ROC.jpg'),
                            is_show=False)

            self.__classifier.Save(store_folder)

            self.SaveCVInfo(info, store_folder)

        return train_metric, val_metric, test_metric