예제 #1
0
def TestNewData(NewDataCsv, model_folder, result_save_path=''):
    '''

    :param NewDataCsv: New radiomics feature matrix csv file path
    :param model_folder:The trained model path
    :return:classification result
    '''
    train_info = LoadTrainInfo(model_folder)
    new_data_container = DataContainer()

    #Normlization

    new_data_container.Load(NewDataCsv)

    # feature_selector = FeatureSelector()
    # feature_selector.SelectFeatureByName(new_data_container, train_info['selected_features'], is_replace=True)

    new_data_container = train_info['normalizer'].Transform(new_data_container)

    # data_frame = new_data_container.GetFrame()
    # data_frame = data_frame[train_info['selected_features']]
    # new_data_container.SetFrame(data_frame)
    # new_data_container.UpdateDataByFrame()

    ##Model
    train_info['classifier'].SetDataContainer(new_data_container)
    model = train_info['classifier'].GetModel()
    predict = model.predict_proba(new_data_container.GetArray())[:, 1]

    label = new_data_container.GetLabel()
    case_name = new_data_container.GetCaseName()

    test_result_info = [['CaseName', 'Pred', 'Label']]
    for index in range(len(label)):
        test_result_info.append(
            [case_name[index], predict[index], label[index]])

    metric = EstimateMetirc(predict, label)
    info = {}
    info.update(metric)
    cv = CrossValidation()

    print(metric)
    print('\t')

    if result_save_path:
        cv.SaveResult(info, result_save_path)
        np.save(os.path.join(result_save_path, 'test_predict.npy'), predict)
        np.save(os.path.join(result_save_path, 'test_label.npy'), label)
        with open(os.path.join(result_save_path, 'test_info.csv'),
                  'w',
                  newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(test_result_info)

    return metric
예제 #2
0
    def Run(self, data_container, test_data_container=DataContainer(), store_folder=''):
        train_pred_list, train_label_list, val_pred_list, val_label_list = [], [], [], []

        data = data_container.GetArray()
        label = data_container.GetLabel()
        val_index_store = []

        for train_index, val_index in self.__cv.split(data, label):
            val_index_store.extend(val_index)

            train_data = data[train_index, :]
            train_label = label[train_index]
            val_data = data[val_index, :]
            val_label = label[val_index]

            self.__classifier.SetData(train_data, train_label)
            self.__classifier.Fit()

            train_prob = self.__classifier.Predict(train_data)
            val_prob = self.__classifier.Predict(val_data)

            train_pred_list.extend(train_prob)
            train_label_list.extend(train_label)
            val_pred_list.extend(val_prob)
            val_label_list.extend(val_label)

        total_train_label = np.asarray(train_label_list, dtype=np.uint8)
        total_train_pred = np.asarray(train_pred_list, dtype=np.float32)
        train_metric = EstimateMetirc(total_train_pred, total_train_label, 'train')

        total_label = np.asarray(val_label_list, dtype=np.uint8)
        total_pred = np.asarray(val_pred_list, dtype=np.float32)
        val_metric = EstimateMetirc(total_pred, total_label, 'val')

        self.__classifier.SetDataContainer(data_container)
        self.__classifier.Fit()

        test_metric = {}
        if test_data_container.GetArray().size > 0:
            test_data = test_data_container.GetArray()
            test_label = test_data_container.GetLabel()
            test_pred = self.__classifier.Predict(test_data)

            test_metric = EstimateMetirc(test_pred, test_label, 'test')

        if store_folder:
            if not os.path.exists(store_folder):
                os.mkdir(store_folder)

            info = {}
            info.update(train_metric)
            info.update(val_metric)

            np.save(os.path.join(store_folder, 'train_predict.npy'), total_train_pred)
            np.save(os.path.join(store_folder, 'val_predict.npy'), total_pred)
            np.save(os.path.join(store_folder, 'train_label.npy'), total_train_label)
            np.save(os.path.join(store_folder, 'val_label.npy'), total_label)

            cv_info_path = os.path.join(store_folder, 'cv_info.csv')
            df = pd.DataFrame(data=val_index_store)
            df.to_csv(cv_info_path)

            DrawROCList(total_train_pred, total_train_label, store_path=os.path.join(store_folder, 'train_ROC.jpg'), is_show=False)
            DrawROCList(total_pred, total_label, store_path=os.path.join(store_folder, 'val_ROC.jpg'), is_show=False)

            if test_data_container.GetArray().size > 0:
                info.update(test_metric)
                np.save(os.path.join(store_folder, 'test_predict.npy'), test_pred)
                np.save(os.path.join(store_folder, 'test_label.npy'), test_label)
                DrawROCList(test_pred, test_label, store_path=os.path.join(store_folder, 'test_ROC.jpg'),
                            is_show=False)

            self.__classifier.Save(store_folder)
            self.SaveResult(info, store_folder)

        return train_metric, val_metric, test_metric
예제 #3
0
    def Run(self,
            data_container,
            test_data_container=DataContainer(),
            store_folder=''):
        train_pred_list, train_label_list, val_pred_list, val_label_list = [], [], [], []

        data = data_container.GetArray()
        label = data_container.GetLabel()

        for train_index, val_index in self.__cv.split(data, label):
            train_data = data[train_index, :]
            train_label = label[train_index]
            val_data = data[val_index, :]
            val_label = label[val_index]

            self.__classifier.SetData(train_data, train_label)
            self.__classifier.Fit()

            train_prob = self.__classifier.Predict(train_data)
            val_prob = self.__classifier.Predict(val_data)

            train_pred_list.extend(train_prob)
            train_label_list.extend(train_label)
            val_pred_list.extend(val_prob)
            val_label_list.extend(val_label)

        total_train_label = np.asarray(train_label_list, dtype=np.uint8)
        total_train_pred = np.asarray(train_pred_list, dtype=np.float32)
        train_metric = EstimateMetirc(total_train_pred, total_train_label,
                                      'train')

        total_label = np.asarray(val_label_list, dtype=np.uint8)
        total_pred = np.asarray(val_pred_list, dtype=np.float32)
        val_metric = EstimateMetirc(total_pred, total_label, 'val')

        self.__classifier.SetData(data, label)
        self.__classifier.Fit()

        test_metric = {}
        if test_data_container.GetArray().size > 0:
            selected_feature_name = data_container.GetFeatureName()
            fs = FeatureSelector()
            test_data_container = fs.SelectFeatureByName(
                test_data_container, selected_feature_name)

            test_data = test_data_container.GetArray()
            test_label = test_data_container.GetLabel()
            test_pred = self.__classifier.Predict(test_data)

            test_metric = EstimateMetirc(test_pred, test_label, 'test')

        if store_folder and os.path.isdir(store_folder):
            info = {}
            info.update(train_metric)
            info.update(val_metric)

            np.save(os.path.join(store_folder, 'train_predict.npy'),
                    total_train_pred)
            np.save(os.path.join(store_folder, 'val_predict.npy'), total_pred)
            np.save(os.path.join(store_folder, 'train_label.npy'),
                    total_train_label)
            np.save(os.path.join(store_folder, 'val_label.npy'), total_label)

            if test_data_container.GetArray().size > 0:
                info.update(test_metric)
                np.save(os.path.join(store_folder, 'test_predict.npy'),
                        test_pred)
                np.save(os.path.join(store_folder, 'test_label.npy'),
                        test_label)

            self.__classifier.Save(store_folder)

            self.SaveCVInfo(info, store_folder)

        return train_metric, val_metric, test_metric
예제 #4
0
    def Run(self,
            data_container,
            test_data_container=DataContainer(),
            store_folder='',
            is_hyper_parameter=False):
        train_pred_list, train_label_list, val_pred_list, val_label_list = [], [], [], []

        data = data_container.GetArray()
        label = data_container.GetLabel()
        case_name = data_container.GetCaseName()

        param_metric_train_auc = []
        param_metric_val_auc = []
        param_all = []

        if len(self.classifier_parameter_list) == 1 and is_hyper_parameter:
            self.AutoLoadClassifierParameterList(
                relative_path=r'HyperParameters\Classifier')

        for parameter in self.classifier_parameter_list:
            self.SetDefaultClassifier()
            self.classifier.SetModelParameter(parameter)

            train_cv_info = [['CaseName', 'Group', 'Pred', 'Label']]
            val_cv_info = [['CaseName', 'Group', 'Pred', 'Label']]
            group_index = 0

            for train_index, val_index in self.__cv.split(data, label):
                group_index += 1

                train_data = data[train_index, :]
                train_label = label[train_index]
                val_data = data[val_index, :]
                val_label = label[val_index]

                self.classifier.SetData(train_data, train_label)
                self.classifier.Fit()

                train_prob = self.classifier.Predict(train_data)
                val_prob = self.classifier.Predict(val_data)

                for index in range(len(train_index)):
                    train_cv_info.append([
                        case_name[train_index[index]],
                        str(group_index), train_prob[index], train_label[index]
                    ])
                for index in range(len(val_index)):
                    val_cv_info.append([
                        case_name[val_index[index]],
                        str(group_index), val_prob[index], val_label[index]
                    ])

                train_pred_list.extend(train_prob)
                train_label_list.extend(train_label)
                val_pred_list.extend(val_prob)
                val_label_list.extend(val_label)

            total_train_label = np.asarray(train_label_list, dtype=np.uint8)
            total_train_pred = np.asarray(train_pred_list, dtype=np.float32)
            train_cv_metric = EstimateMetirc(total_train_pred,
                                             total_train_label, 'train')

            total_val_label = np.asarray(val_label_list, dtype=np.uint8)
            total_val_pred = np.asarray(val_pred_list, dtype=np.float32)
            val_cv_metric = EstimateMetirc(total_val_pred, total_val_label,
                                           'val')

            param_metric_train_auc.append(float(train_cv_metric['train_auc']))
            param_metric_val_auc.append(float(val_cv_metric['val_auc']))
            param_all.append({
                'total_train_label': total_train_label,
                'total_train_pred': total_train_pred,
                'train_metric': train_cv_metric,
                'train_cv_info': deepcopy(train_cv_info),
                'total_val_label': total_val_label,
                'total_val_pred': total_val_pred,
                'val_metric': val_cv_metric,
                'val_cv_info': deepcopy(val_cv_info)
            })

        # find the best parameter
        index = np.argmax(param_metric_val_auc)
        total_train_label = param_all[index]['total_train_label']
        total_train_pred = param_all[index]['total_train_pred']
        train_cv_metric = param_all[index]['train_metric']
        train_cv_info = param_all[index]['train_cv_info']
        total_val_label = param_all[index]['total_val_label']
        total_val_pred = param_all[index]['total_val_pred']
        val_cv_metric = param_all[index]['val_metric']
        val_cv_info = param_all[index]['val_cv_info']

        self.SetDefaultClassifier()
        self.classifier.SetModelParameter(
            self.classifier_parameter_list[index])
        self.classifier.SetDataContainer(data_container)
        self.classifier.Fit()

        all_train_pred = self.classifier.Predict(data_container.GetArray())
        all_train_label = data_container.GetLabel()
        all_train_metric = EstimateMetirc(all_train_pred, all_train_label,
                                          'all_train')

        test_metric = {}
        if test_data_container.GetArray().size > 0:
            test_data = test_data_container.GetArray()
            test_label = test_data_container.GetLabel()
            test_case_name = test_data_container.GetCaseName()
            test_pred = self.classifier.Predict(test_data)

            test_metric = EstimateMetirc(test_pred, test_label, 'test')

        if store_folder:
            if not os.path.exists(store_folder):
                os.mkdir(store_folder)

            # Save the Parameter:
            if self.classifier_parameter_list[0] != {}:
                with open(os.path.join(store_folder,
                                       'Classifier_Param_Result.csv'),
                          'w',
                          newline='') as csvfile:
                    writer = csv.writer(csvfile)
                    writer.writerow(['Param', 'Train AUC', 'Val AUC'])
                    for param, param_index in zip(
                            self.classifier_parameter_list,
                            range(len(self.classifier_parameter_list))):
                        writer.writerow([
                            self._GetNameOfParamDict(param),
                            param_metric_train_auc[param_index],
                            param_metric_val_auc[param_index]
                        ])

            info = {}
            info.update(train_cv_metric)
            info.update(val_cv_metric)
            info.update(all_train_metric)

            np.save(os.path.join(store_folder, 'train_predict.npy'),
                    total_train_pred)
            np.save(os.path.join(store_folder, 'train_label.npy'),
                    total_train_label)
            np.save(os.path.join(store_folder, 'val_predict.npy'),
                    total_val_pred)
            np.save(os.path.join(store_folder, 'val_label.npy'),
                    total_val_label)
            np.save(os.path.join(store_folder, 'all_train_predict.npy'),
                    all_train_pred)
            np.save(os.path.join(store_folder, 'all_train_label.npy'),
                    all_train_label)

            with open(os.path.join(store_folder, 'train_cv5_info.csv'),
                      'w',
                      newline='') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerows(train_cv_info)
            with open(os.path.join(store_folder, 'val_cv5_info.csv'),
                      'w',
                      newline='') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerows(val_cv_info)

            if test_data_container.GetArray().size > 0:
                info.update(test_metric)
                np.save(os.path.join(store_folder, 'test_predict.npy'),
                        test_pred)
                np.save(os.path.join(store_folder, 'test_label.npy'),
                        test_label)

                test_result_info = [['CaseName', 'Pred', 'Label']]
                for index in range(len(test_label)):
                    test_result_info.append([
                        test_case_name[index], test_pred[index],
                        test_label[index]
                    ])
                with open(os.path.join(store_folder, 'test_info.csv'),
                          'w',
                          newline='') as csvfile:
                    writer = csv.writer(csvfile)
                    writer.writerows(test_result_info)

            self.classifier.Save(store_folder)
            self.SaveResult(info, store_folder)

        return train_cv_metric, val_cv_metric, test_metric, all_train_metric
예제 #5
0
    def Run(self, data_container, test_data_container=DataContainer(), store_folder=''):
        train_pred_list, train_label_list, val_pred_list, val_label_list = [], [], [], []

        data = data_container.GetArray()
        label = data_container.GetLabel()
        case_name = data_container.GetCaseName()

        train_cv_info = [['CaseName', 'Pred', 'Label']]
        val_cv_info = [['CaseName', 'Pred', 'Label']]

        for train_index, val_index in self.__cv.split(data, label):
            train_data = data[train_index, :]
            train_label = label[train_index]
            val_data = data[val_index, :]
            val_label = label[val_index]

            self.classifier.SetData(train_data, train_label)
            self.classifier.Fit()

            train_prob = self.classifier.Predict(train_data)
            val_prob = self.classifier.Predict(val_data)

            for index in range(len(train_index)):
                train_cv_info.append(
                    [case_name[train_index[index]], train_prob[index], train_label[index]])
            for index in range(len(val_index)):
                val_cv_info.append([case_name[val_index[index]], val_prob[index], val_label[index]])

            train_pred_list.extend(train_prob)
            train_label_list.extend(train_label)
            val_pred_list.extend(val_prob)
            val_label_list.extend(val_label)

        total_train_label = np.asarray(train_label_list, dtype=np.uint8)
        total_train_pred = np.asarray(train_pred_list, dtype=np.float32)
        train_metric = EstimateMetirc(total_train_pred, total_train_label, 'train')

        total_label = np.asarray(val_label_list, dtype=np.uint8)
        total_pred = np.asarray(val_pred_list, dtype=np.float32)
        val_metric = EstimateMetirc(total_pred, total_label, 'val')

        self.classifier.SetDataContainer(data_container)
        self.classifier.Fit()

        test_metric = {}
        if test_data_container.GetArray().size > 0:
            test_data = test_data_container.GetArray()
            test_label = test_data_container.GetLabel()
            test_case_name = test_data_container.GetCaseName()
            test_pred = self.classifier.Predict(test_data)

            test_metric = EstimateMetirc(test_pred, test_label, 'test')

        if store_folder:
            if not os.path.exists(store_folder):
                os.mkdir(store_folder)

            info = {}
            info.update(train_metric)
            info.update(val_metric)

            np.save(os.path.join(store_folder, 'train_predict.npy'), total_train_pred)
            np.save(os.path.join(store_folder, 'val_predict.npy'), total_pred)
            np.save(os.path.join(store_folder, 'train_label.npy'), total_train_label)
            np.save(os.path.join(store_folder, 'val_label.npy'), total_label)

            with open(os.path.join(store_folder, 'train_cvloo_info.csv'), 'w', newline='') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerows(train_cv_info)
            with open(os.path.join(store_folder, 'val_cvloo_info.csv'), 'w', newline='') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerows(val_cv_info)

            if test_data_container.GetArray().size > 0:
                info.update(test_metric)
                np.save(os.path.join(store_folder, 'test_predict.npy'), test_pred)
                np.save(os.path.join(store_folder, 'test_label.npy'), test_label)

                test_result_info = [['CaseName', 'Pred', 'Label']]
                for index in range(len(test_label)):
                    test_result_info.append([test_case_name[index], test_pred[index], test_label[index]])
                with open(os.path.join(store_folder, 'test_info.csv'), 'w', newline='') as csvfile:
                    writer = csv.writer(csvfile)
                    writer.writerows(test_result_info)

            self.classifier.Save(store_folder)
            self.SaveResult(info, store_folder)

        return train_metric, val_metric, test_metric