Example #1
0
class FeatureSelectByMrmr(FeatureSelectByAnalysis):
    def __init__(self, selected_feature_number=1):
        super(FeatureSelectByMrmr, self).__init__(selected_feature_number)
        self._hyper_parameter_manager = HyperParameterManager()

    def GetDescription(self):
        text = "Before build the model, we used minimum-Redundancy-Maximum-Relevance (mRMR) to select features. The goal of mRMR " \
               "is to select a feature subset set that best characterizes the statistical property of a target classification variable," \
               "subject to the constraint that these features are mutually as dissimilar to each other as possible, but marginally as similar to the classification variable as possible."
        return text

    def GetSelectedFeatureIndex(self, data_container):
        data = data_container.GetArray()
        data /= np.linalg.norm(data, ord=2, axis=0)
        label = data_container.GetLabel()

        if data.shape[1] < self.GetSelectedFeatureNumber():
            print(
                'mMRM: The number of features {:d} in data container is smaller than the required number {:d}'
                .format(data.shape[1], self.GetSelectedFeatureNumber()))
            self.SetSelectedFeatureNumber(data.shape[1])

        feature_list = ['class'] + data_container.GetFeatureName()
        feature_index = []
        pd_label = pd.DataFrame(label)
        pd_data = pd.DataFrame(data)
        mRMR_input = pd.concat([pd_label, pd_data], axis=1)
        mRMR_input.columns = feature_list
        parameter_list = self.LoadFeatureSelectorParameterList(
            relative_path=r'HyperParameters\FeatureSelector')
        feature_name = pymrmr.mRMR(mRMR_input,
                                   parameter_list[0]['mutual_information'],
                                   self.GetSelectedFeatureNumber())
        feature_list.remove('class')

        rank = []
        for index, item in enumerate(feature_name):
            feature_index.append(feature_list.index(item))
            rank.append(index)
        return feature_index, rank, feature_name

    def GetName(self):
        return 'mRMR'

    def LoadFeatureSelectorParameterList(self,
                                         relative_path=os.path.join(
                                             'HyperParameters',
                                             'FeatureSelector')):
        self._hyper_parameter_manager.LoadSpecificConfig(
            self.GetName(), relative_path=relative_path)
        parameter_list = self._hyper_parameter_manager.GetParameterSetting()
        return parameter_list

    def Run(self, data_container, store_folder=''):
        selected_index, rank, feature_name = self.GetSelectedFeatureIndex(
            data_container)
        new_data_container = self.SelectFeatureByIndex(data_container,
                                                       selected_index,
                                                       is_replace=False)
        if store_folder and os.path.isdir(store_folder):
            feature_store_path = os.path.join(store_folder,
                                              'selected_feature.csv')
            featureinfo_store_path = os.path.join(store_folder,
                                                  'feature_select_info.csv')

            new_data_container.Save(feature_store_path)
            SaveSelectInfo(new_data_container,
                           featureinfo_store_path,
                           is_merge=False)

            mrmr_sort_path = os.path.join(store_folder, 'mMRM_sort.csv')
            df = pd.DataFrame(data=rank, index=feature_name, columns=['rank'])
            df.to_csv(mrmr_sort_path)

        return new_data_container
Example #2
0
class CrossValidation:
    '''
    CrossValidation is the base class to explore the hpyer-parameters. Now it supported Leave-one-lout (LOO), 10-folder,
    and 5-folders. A classifier must be set before run CV. A training metric and validation metric will be returned.
    If a testing data container was also set, the test metric will be return.
    '''
    def __init__(self):
        self._raw_classifier = Classifier()
        self.__classifier = Classifier()
        self._hyper_parameter_manager = HyperParameterManager()
        self.__classifier_parameter_list = [{}]

    def SetDefaultClassifier(self):
        self.__classifier = deepcopy(self._raw_classifier)

    def SetClassifier(self, classifier):
        self.__init__()
        self._raw_classifier = deepcopy(classifier)
        self.__classifier = classifier

    def GetClassifier(self):
        return self.__classifier

    classifier = property(GetClassifier, SetClassifier)

    def AutoLoadClassifierParameterList(self,
                                        relative_path=os.path.join(
                                            'HyperParameters', 'Classifier')):
        self._hyper_parameter_manager.LoadSpecificConfig(
            self.classifier.GetName(), relative_path=relative_path)
        self.__classifier_parameter_list = self._hyper_parameter_manager.GetParameterSetting(
        )

    def SetClassifierParameterList(self, classifier_parameter_list):
        self.__classifier_parameter_list = deepcopy(classifier_parameter_list)

    def GetClassifierParameterList(self):
        return self.__classifier_parameter_list

    classifier_parameter_list = property(GetClassifierParameterList,
                                         SetClassifierParameterList)

    def _GetNameOfParamDict(self, param_dict):
        name = ''
        for key, item in param_dict.items():
            name += str(key) + '_' + str(item) + '-'
        return name[:len(name) - 1]

    def SaveResult(self, info, store_path):
        info = dict(sorted(info.items(), key=lambda item: item[0]))

        write_info = []
        for key in info.keys():
            temp_list = []
            temp_list.append(key)
            if isinstance(info[key], (numbers.Number, str)):
                temp_list.append(info[key])
            else:
                temp_list.extend(info[key])
            write_info.append(temp_list)

        write_info.sort()

        # write_info = [[key].extend(info[key]) for key in info.keys()]
        if os.path.isdir(store_path):
            store_path = os.path.join(store_path, 'result.csv')

        with open(store_path, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile,
                                delimiter=',',
                                quotechar='"',
                                quoting=csv.QUOTE_MINIMAL)
            write_info.sort()
            writer.writerows(write_info)