Ejemplo n.º 1
0
 def LoadPipelineInfo(self, store_folder):
     index_2_dict = Index2Dict()
     with open(os.path.join(store_folder, 'pipeline_info.csv'),
               'r',
               newline='') as csvfile:
         reader = csv.reader(csvfile)
         for row in reader:
             if row[0] == 'Normalizer':
                 self.__normalizer_list = []
                 for index in row[1:]:
                     self.__normalizer_list.append(
                         index_2_dict.GetInstantByIndex(index))
             elif row[0] == 'DimensionReduction':
                 self._dimension_reduction_list = []
                 for index in row[1:]:
                     self._dimension_reduction_list.append(
                         index_2_dict.GetInstantByIndex(index))
             elif row[0] == 'FeatureSelector':
                 self.__feature_selector_list = []
                 for index in row[1:]:
                     self.__feature_selector_list.append(
                         index_2_dict.GetInstantByIndex(index))
             elif row[0] == 'FeatureNumber':
                 self.__feature_selector_num_list = row[1:]
             elif row[0] == 'Classifier':
                 self.__classifier_list = []
                 for index in row[1:]:
                     self.__classifier_list.append(
                         index_2_dict.GetInstantByIndex(index))
             elif row[0] == 'CrossValidation':
                 self.__cross_validation = index_2_dict.GetInstantByIndex(
                     row[1])
             else:
                 print('Unknown name.')
Ejemplo n.º 2
0
    def LoadPipelineInfo(self, store_folder):
        index_2_dict = Index2Dict()
        info_path = os.path.join(store_folder, 'pipeline_info.csv')
        if not os.path.exists(info_path):
            return False

        with open(info_path, 'r', newline='') as csvfile:
            reader = csv.reader(csvfile)
            for row in reader:
                if row[0] == VERSION_NAME:
                    self.version = row[1]
                    if self.version not in ACCEPT_VERSION:
                        return False
                elif row[0] == CROSS_VALIDATION:
                    self.cv = index_2_dict.GetInstantByIndex(row[1])
                elif row[0] == BALANCE:
                    self.balance = index_2_dict.GetInstantByIndex(row[1])
                elif row[0] == NORMALIER:
                    self.normalizer_list = [index_2_dict.GetInstantByIndex(index) for index in row[1:]]
                elif row[0] == DIMENSION_REDUCTION:
                    self.dimension_reduction_list = [index_2_dict.GetInstantByIndex(index) for index in row[1:]]
                elif row[0] == FEATURE_SELECTOR:
                    self.feature_selector_list = [index_2_dict.GetInstantByIndex(index) for index in row[1:]]
                elif row[0] == FEATURE_NUMBER:
                    self.feature_selector_num_list = row[1:]
                elif row[0] == CLASSIFIER:
                    self.classifier_list = [index_2_dict.GetInstantByIndex(index) for index in row[1:]]
                else:
                    print('Unknown name: {}'.format(row[0]))
                    raise KeyError
        return True
Ejemplo n.º 3
0
    def _MethodDescription(self, pipeline_name, result_root):
        index_dict = Index2Dict()
        norm_folder, dr_folder, fs_folder, cls_folder = self.__manager.SplitFolder(
            pipeline_name, result_root)

        normalizer = index_dict.GetInstantByIndex(
            os.path.split(norm_folder)[1])
        dr = index_dict.GetInstantByIndex(os.path.split(dr_folder)[1])
        fs = index_dict.GetInstantByIndex(
            os.path.split(fs_folder)[1].split('_')[0])
        cls = index_dict.GetInstantByIndex(os.path.split(cls_folder)[1])

        with open(os.path.join(result_root, 'pipeline_info.csv'), 'r') as file:
            rows = csv.reader(file)
            for row in rows:
                if CROSS_VALIDATION == row[0]:
                    cv = index_dict.GetInstantByIndex(row[1])
                elif BALANCE == row[0]:
                    balance = index_dict.GetInstantByIndex(row[1])

        method_description = "    "
        method_description += balance.GetDescription()
        method_description += normalizer.GetDescription()
        method_description += dr.GetDescription()
        method_description += fs.GetDescription()
        method_description += cls.GetDescription()
        method_description += cv.GetDescription()
        method_description += "\n"

        return method_description
Ejemplo n.º 4
0
 def LoadPipeline(self, store_path):
     index_2_dict = Index2Dict()
     feature_number = 0
     with open(store_path, 'r', newline='') as csvfile:
         reader = csv.reader(csvfile)
         for row in reader:
             if row[0] == 'Normalizer':
                 self.__normalizer = index_2_dict.GetInstantByIndex(row[1])
             if row[0] == 'DimensionReduction':
                 self.__dimension_reduction = index_2_dict.GetInstantByIndex(row[1])
             if row[0] == 'FeatureSelector':
                 self.__feature_selector = index_2_dict.GetInstantByIndex(row[1])
             if row[0] == 'FeatureNumber':
                 feature_number = int(row[1])
             if row[0] == 'Classifier':
                 self.__classifier = index_2_dict.GetInstantByIndex(row[1])
             if row[0] == 'CrossValidation':
                 self.__cv = index_2_dict.GetInstantByIndex(row[1])
     self.__feature_selector.SetSelectedFeatureNumber(feature_number)
Ejemplo n.º 5
0
                                                               key_word=CV_VAL)
                            self.__auc_dict[CV_VAL][norm_index, dr_index, fs_index, fn_index, cls_index] = \
                                cv_val_metric['{}_{}'.format(CV_VAL, AUC)]
                            self.__auc_std_dict[CV_VAL][norm_index, dr_index, fs_index, fn_index, cls_index] = \
                                cv_val_metric['{}_{}'.format(CV_VAL, AUC_STD)]
                            self._AddOneMetric(cv_val_metric, os.path.join(cls_store_folder, 'metrics.csv'))
                            self._MergeOneMetric(cv_val_metric, CV_VAL, model_name)

        self.total_metric[CV_TRAIN].to_csv(os.path.join(store_folder, '{}_results.csv'.format(CV_TRAIN)))
        self.total_metric[CV_VAL].to_csv(os.path.join(store_folder, '{}_results.csv'.format(CV_VAL)))


if __name__ == '__main__':
    manager = PipelinesManager()

    index_dict = Index2Dict()

    train = DataContainer()
    test = DataContainer()
    train.Load(r'C:\Users\yangs\Desktop\train_numeric_feature.csv')
    test.Load(r'C:\Users\yangs\Desktop\test_numeric_feature.csv')

    faps = PipelinesManager(balancer=index_dict.GetInstantByIndex('UpSampling'),
                            normalizer_list=[index_dict.GetInstantByIndex('Mean')],
                            dimension_reduction_list=[index_dict.GetInstantByIndex('PCC')],
                            feature_selector_list=[index_dict.GetInstantByIndex('ANOVA')],
                            feature_selector_num_list=list(np.arange(1, 18)),
                            classifier_list=[index_dict.GetInstantByIndex('SVM')],
                            cross_validation=index_dict.GetInstantByIndex('5-Fold'))

    # for total, num in faps.RunWithoutCV(train, store_folder=r'..\..\Demo\db2-1'):