Ejemplo n.º 1
0
 def fit(self, data, label):
     self.train_data, self.validate_data, self.train_label, self.validation_y = train_test_split(
         data, label, test_size=0.25)
     self.matrix, self.index, self.predictors, self.predictor_weights = \
         self.create_matrix(self.train_data, self.train_label, self.validate_data, self.validation_y, self.estimator,
                            **self.param)
     feature_subset = MT.get_subset_feature_from_matrix(
         self.matrix, self.index)
     for i in range(self.iter_num):
         y_pred = self.predict(self.validate_data)
         y_true = self.validation_y
         confusion_matrix = MT.create_confusion_matrix(
             y_true, y_pred, self.index)
         while True:
             max_index = np.argmax(confusion_matrix)
             max_index_y = np.floor(max_index / confusion_matrix.shape[1])
             max_index_x = max_index % confusion_matrix.shape[1]
             label_y = MT.get_key(self.index, max_index_y)
             label_x = MT.get_key(self.index, max_index_x)
             score_result = 0
             col_result = None
             est_result = None
             est_weight_result = None
             feature_subset_m = None
             feature_subset_n = None
             for m in range(len(feature_subset) - 1):
                 for n in range(m + 1, len(feature_subset)):
                     if ((label_y in feature_subset[m] and label_x in feature_subset[n])
                         or (label_y in feature_subset[n] and label_x in feature_subset[m])) \
                             and (set(feature_subset[m]).intersection(set(feature_subset[n])) == set()):
                         col = MT.create_col_from_partition(
                             feature_subset[m], feature_subset[n],
                             self.index)
                         if not MT.have_same_col(col, self.matrix):
                             train_data, train_cla = MT.get_data_from_col(
                                 self.train_data, self.train_label, col,
                                 self.index)
                             est = self.estimator(**self.param).fit(
                                 train_data, train_cla)
                             validation_data, validation_cla = MT.get_data_from_col(
                                 self.validate_data, self.validation_y, col,
                                 self.index)
                             if validation_data is None:
                                 score = 0.8
                             else:
                                 score = est.score(validation_data,
                                                   validation_cla)
                             if score >= score_result:
                                 score_result = score
                                 col_result = col
                                 est_result = est
                                 est_weight_result = MT.estimate_weight(
                                     1 - score_result)
                                 feature_subset_m = m
                                 feature_subset_n = n
             if col_result is None:
                 confusion_matrix[np.int(max_index_y),
                                  np.int(max_index_x)] = 0
                 if np.sum(confusion_matrix) == 0:
                     break
             else:
                 break
         try:
             self.matrix = np.hstack((self.matrix, col_result))
             self.predictors.append(est_result)
             self.predictor_weights.append(est_weight_result)
             feature_subset.append(feature_subset[feature_subset_m] +
                                   feature_subset[feature_subset_n])
         except (TypeError, ValueError):
             pass
Ejemplo n.º 2
0
 def create_matrix(self, train_data, train_label, validate_data,
                   validate_label, estimator, **param):
     index = {l: i for i, l in enumerate(np.unique(train_label))}
     matrix = None
     predictors = []
     predictor_weights = []
     labels_to_divide = [np.unique(train_label)]
     while len(labels_to_divide) > 0:
         label_set = labels_to_divide.pop(0)
         label_count = len(label_set)
         groups = combinations(range(label_count),
                               np.int(np.ceil(label_count / 2)))
         score_result = 0
         est_result = None
         for group in groups:
             class_1_variety = np.array([label_set[i] for i in group])
             class_2_variety = np.array(
                 [l for l in label_set if l not in class_1_variety])
             class_1_data, class_1_label = MT.get_data_subset(
                 train_data, train_label, class_1_variety)
             class_2_data, class_2_label = MT.get_data_subset(
                 train_data, train_label, class_2_variety)
             class_1_cla = np.ones(len(class_1_data))
             class_2_cla = -np.ones(len(class_2_data))
             train_d = np.vstack((class_1_data, class_2_data))
             train_c = np.hstack((class_1_cla, class_2_cla))
             est = estimator(**param).fit(train_d, train_c)
             class_1_data, class_1_label = MT.get_data_subset(
                 validate_data, validate_label, class_1_variety)
             class_2_data, class_2_label = MT.get_data_subset(
                 validate_data, validate_label, class_2_variety)
             class_1_cla = np.ones(len(class_1_data))
             class_2_cla = -np.ones(len(class_2_data))
             validation_d = np.array([])
             validation_c = np.array([])
             try:
                 validation_d = np.vstack((class_1_data, class_2_data))
                 validation_c = np.hstack((class_1_cla, class_2_cla))
             except Exception:
                 if len(class_1_data) > 0:
                     validation_d = class_1_data
                     validation_c = class_1_cla
                 elif len(class_2_data) > 0:
                     validation_d = class_2_data
                     validation_c = class_2_cla
             if validation_d.shape[0] > 0 and validation_d.shape[1] > 0:
                 score = est.score(validation_d, validation_c)
             else:
                 score = 0.8
             if score >= score_result:
                 score_result = score
                 est_result = est
                 class_1_variety_result = class_1_variety
                 class_2_variety_result = class_2_variety
         new_col = np.zeros((len(index), 1))
         for i in class_1_variety_result:
             new_col[index[i]] = 1
         for i in class_2_variety_result:
             new_col[index[i]] = -1
         if matrix is None:
             matrix = copy.copy(new_col)
         else:
             matrix = np.hstack((matrix, new_col))
         predictors.append(est_result)
         predictor_weights.append(MT.estimate_weight(1 - score_result))
         if len(class_1_variety_result) > 1:
             labels_to_divide.append(class_1_variety_result)
         if len(class_2_variety_result) > 1:
             labels_to_divide.append(class_2_variety_result)
     return matrix, index, predictors, predictor_weights