def fit(self, data, label): self.train_data, self.validate_data, self.train_label, self.validation_y = train_test_split( data, label, test_size=0.25) self.matrix, self.index, self.predictors, self.predictor_weights = \ self.create_matrix(self.train_data, self.train_label, self.validate_data, self.validation_y, self.estimator, **self.param) feature_subset = MT.get_subset_feature_from_matrix( self.matrix, self.index) for i in range(self.iter_num): y_pred = self.predict(self.validate_data) y_true = self.validation_y confusion_matrix = MT.create_confusion_matrix( y_true, y_pred, self.index) while True: max_index = np.argmax(confusion_matrix) max_index_y = np.floor(max_index / confusion_matrix.shape[1]) max_index_x = max_index % confusion_matrix.shape[1] label_y = MT.get_key(self.index, max_index_y) label_x = MT.get_key(self.index, max_index_x) score_result = 0 col_result = None est_result = None est_weight_result = None feature_subset_m = None feature_subset_n = None for m in range(len(feature_subset) - 1): for n in range(m + 1, len(feature_subset)): if ((label_y in feature_subset[m] and label_x in feature_subset[n]) or (label_y in feature_subset[n] and label_x in feature_subset[m])) \ and (set(feature_subset[m]).intersection(set(feature_subset[n])) == set()): col = MT.create_col_from_partition( feature_subset[m], feature_subset[n], self.index) if not MT.have_same_col(col, self.matrix): train_data, train_cla = MT.get_data_from_col( self.train_data, self.train_label, col, self.index) est = self.estimator(**self.param).fit( train_data, train_cla) validation_data, validation_cla = MT.get_data_from_col( self.validate_data, self.validation_y, col, self.index) if validation_data is None: score = 0.8 else: score = est.score(validation_data, validation_cla) if score >= score_result: score_result = score col_result = col est_result = est est_weight_result = MT.estimate_weight( 1 - score_result) feature_subset_m = m feature_subset_n = n if col_result is None: confusion_matrix[np.int(max_index_y), np.int(max_index_x)] = 0 if np.sum(confusion_matrix) == 0: break else: break try: self.matrix = np.hstack((self.matrix, col_result)) self.predictors.append(est_result) self.predictor_weights.append(est_weight_result) feature_subset.append(feature_subset[feature_subset_m] + feature_subset[feature_subset_n]) except (TypeError, ValueError): pass
def create_matrix(self, train_data, train_label, validate_data, validate_label, estimator, **param): index = {l: i for i, l in enumerate(np.unique(train_label))} matrix = None predictors = [] predictor_weights = [] labels_to_divide = [np.unique(train_label)] while len(labels_to_divide) > 0: label_set = labels_to_divide.pop(0) label_count = len(label_set) groups = combinations(range(label_count), np.int(np.ceil(label_count / 2))) score_result = 0 est_result = None for group in groups: class_1_variety = np.array([label_set[i] for i in group]) class_2_variety = np.array( [l for l in label_set if l not in class_1_variety]) class_1_data, class_1_label = MT.get_data_subset( train_data, train_label, class_1_variety) class_2_data, class_2_label = MT.get_data_subset( train_data, train_label, class_2_variety) class_1_cla = np.ones(len(class_1_data)) class_2_cla = -np.ones(len(class_2_data)) train_d = np.vstack((class_1_data, class_2_data)) train_c = np.hstack((class_1_cla, class_2_cla)) est = estimator(**param).fit(train_d, train_c) class_1_data, class_1_label = MT.get_data_subset( validate_data, validate_label, class_1_variety) class_2_data, class_2_label = MT.get_data_subset( validate_data, validate_label, class_2_variety) class_1_cla = np.ones(len(class_1_data)) class_2_cla = -np.ones(len(class_2_data)) validation_d = np.array([]) validation_c = np.array([]) try: validation_d = np.vstack((class_1_data, class_2_data)) validation_c = np.hstack((class_1_cla, class_2_cla)) except Exception: if len(class_1_data) > 0: validation_d = class_1_data validation_c = class_1_cla elif len(class_2_data) > 0: validation_d = class_2_data validation_c = class_2_cla if validation_d.shape[0] > 0 and validation_d.shape[1] > 0: score = est.score(validation_d, validation_c) else: score = 0.8 if score >= score_result: score_result = score est_result = est class_1_variety_result = class_1_variety class_2_variety_result = class_2_variety new_col = np.zeros((len(index), 1)) for i in class_1_variety_result: new_col[index[i]] = 1 for i in class_2_variety_result: new_col[index[i]] = -1 if matrix is None: matrix = copy.copy(new_col) else: matrix = np.hstack((matrix, new_col)) predictors.append(est_result) predictor_weights.append(MT.estimate_weight(1 - score_result)) if len(class_1_variety_result) > 1: labels_to_divide.append(class_1_variety_result) if len(class_2_variety_result) > 1: labels_to_divide.append(class_2_variety_result) return matrix, index, predictors, predictor_weights