Beispiel #1
0
    def __init__(self, cv_method='5-folder'):
        self.__classifier = Classifier()

        if cv_method == 'LOO':
            self.__cv = LeaveOneOut()
        elif cv_method == '10-folder':
            self.__cv = StratifiedKFold(10)
        elif cv_method == '5-folder':
            self.__cv = StratifiedKFold(5)
        else:
            self.__cv = None
Beispiel #2
0
def LoadTrainInfo(model_folder):

    train_info = {}
    ##Load normalizaiton

    normalizer = Normalizer()
    normalization_path = ''
    for sub_file in os.listdir(model_folder):
        if sub_file.rfind('_normalization_training.csv') != -1:
            normalization_path = os.path.join(model_folder, sub_file)

    if not os.path.exists(normalization_path):
        print('Check the normalization name : zero_center_normalization')
    else:
        normalizer.LoadInfo(normalization_path)

    train_info['normalizer'] = normalizer
    ## Load selected features

    selected_feature_path = os.path.join(model_folder,
                                         'feature_select_info.csv')
    selected_feature_list = []
    with open(selected_feature_path, 'r', newline='') as f:
        f_reader = csv.reader(f)
        for index in f_reader:
            if index[0] == 'selected_feature':
                selected_feature_list = index[1:]
    if selected_feature_list == []:
        print('No selected features')

    train_info['selected_features'] = selected_feature_list

    ## Load FAE model

    classifier = Classifier()
    classifier.Load(model_folder)
    train_info['classifier'] = classifier

    return train_info
Beispiel #3
0
class CrossValidation:
    '''
    CrossValidation is the base class to explore the hpyer-parameters. Now it supported Leave-one-lout (LOO), 10-folder,
    and 5-folders. A classifier must be set before run CV. A training metric and validation metric will be returned.
    If a testing data container was also set, the test metric will be return.
    '''
    def __init__(self, cv_method='5-folder'):
        self.__classifier = Classifier()

        if cv_method == 'LOO':
            self.__cv = LeaveOneOut()
        elif cv_method == '10-folder':
            self.__cv = StratifiedKFold(10)
        elif cv_method == '5-folder':
            self.__cv = StratifiedKFold(5)
        else:
            self.__cv = None

    def SetClassifier(self, classifier):
        self.__classifier = classifier

    def GetClassifier(self):
        return self.__classifier

    def SetCV(self, cv):
        if cv == 'LOO':
            self.__cv = LeaveOneOut()
        elif cv == '10-folder':
            self.__cv = StratifiedKFold(10)
        elif cv == '5-folder':
            self.__cv = StratifiedKFold(5)

    def GetCV(self):
        return self.__cv

    def SaveResult(self, info, store_path):
        info = dict(sorted(info.items(), key= lambda item: item[0]))

        write_info = []
        for key in info.keys():
            temp_list = []
            temp_list.append(key)
            if isinstance(info[key], (numbers.Number, str)):
                temp_list.append(info[key])
            else:
                temp_list.extend(info[key])
            write_info.append(temp_list)

        write_info.sort()

        # write_info = [[key].extend(info[key]) for key in info.keys()]
        if os.path.isdir(store_path):
            store_path = os.path.join(store_path, 'result.csv')

        with open(store_path, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            write_info.sort()
            writer.writerows(write_info)

    def Run(self, data_container, test_data_container=DataContainer(), store_folder=''):
        train_pred_list, train_label_list, val_pred_list, val_label_list = [], [], [], []

        data = data_container.GetArray()
        label = data_container.GetLabel()
        val_index_store = []

        for train_index, val_index in self.__cv.split(data, label):
            val_index_store.extend(val_index)

            train_data = data[train_index, :]
            train_label = label[train_index]
            val_data = data[val_index, :]
            val_label = label[val_index]

            self.__classifier.SetData(train_data, train_label)
            self.__classifier.Fit()

            train_prob = self.__classifier.Predict(train_data)
            val_prob = self.__classifier.Predict(val_data)

            train_pred_list.extend(train_prob)
            train_label_list.extend(train_label)
            val_pred_list.extend(val_prob)
            val_label_list.extend(val_label)

        total_train_label = np.asarray(train_label_list, dtype=np.uint8)
        total_train_pred = np.asarray(train_pred_list, dtype=np.float32)
        train_metric = EstimateMetirc(total_train_pred, total_train_label, 'train')

        total_label = np.asarray(val_label_list, dtype=np.uint8)
        total_pred = np.asarray(val_pred_list, dtype=np.float32)
        val_metric = EstimateMetirc(total_pred, total_label, 'val')

        self.__classifier.SetDataContainer(data_container)
        self.__classifier.Fit()

        test_metric = {}
        if test_data_container.GetArray().size > 0:
            test_data = test_data_container.GetArray()
            test_label = test_data_container.GetLabel()
            test_pred = self.__classifier.Predict(test_data)

            test_metric = EstimateMetirc(test_pred, test_label, 'test')

        if store_folder:
            if not os.path.exists(store_folder):
                os.mkdir(store_folder)

            info = {}
            info.update(train_metric)
            info.update(val_metric)

            np.save(os.path.join(store_folder, 'train_predict.npy'), total_train_pred)
            np.save(os.path.join(store_folder, 'val_predict.npy'), total_pred)
            np.save(os.path.join(store_folder, 'train_label.npy'), total_train_label)
            np.save(os.path.join(store_folder, 'val_label.npy'), total_label)

            cv_info_path = os.path.join(store_folder, 'cv_info.csv')
            df = pd.DataFrame(data=val_index_store)
            df.to_csv(cv_info_path)

            DrawROCList(total_train_pred, total_train_label, store_path=os.path.join(store_folder, 'train_ROC.jpg'), is_show=False)
            DrawROCList(total_pred, total_label, store_path=os.path.join(store_folder, 'val_ROC.jpg'), is_show=False)

            if test_data_container.GetArray().size > 0:
                info.update(test_metric)
                np.save(os.path.join(store_folder, 'test_predict.npy'), test_pred)
                np.save(os.path.join(store_folder, 'test_label.npy'), test_label)
                DrawROCList(test_pred, test_label, store_path=os.path.join(store_folder, 'test_ROC.jpg'),
                            is_show=False)

            self.__classifier.Save(store_folder)
            self.SaveResult(info, store_folder)

        return train_metric, val_metric, test_metric
Beispiel #4
0
 def __init__(self):
     self._raw_classifier = Classifier()
     self.__classifier = Classifier()
     self._hyper_parameter_manager = HyperParameterManager()
     self.__classifier_parameter_list = [{}]
Beispiel #5
0
 def __init__(self):
     self._classifier = Classifier()