def predict(self, data): res = [] if len(self.predictors) == 0: logging.debug('The Model has not been fitted!') if len(data.shape) == 1: data = np.reshape(data, [1, -1]) for i in data: predict_res = self._use_predictors(i) if self.predicted_vector == []: self.predicted_vector = copy.deepcopy(predict_res) else: self.predicted_vector = np.row_stack( (self.predicted_vector, predict_res)) value = MT.closet_vector(predict_res, self.matrix, y_euclidean_distance, np.array(self.predictor_weights)) res.append(MT.get_key(self.index, value)) vector = [] for i in range(self.matrix.shape[1]): vector.append(list(self.predicted_vector[:, i])) self.predicted_vector = copy.deepcopy(vector) return np.array(res)
def predict(self, data): """ a method used to predict label for give data :param data: data to predict :return: predicted label """ res = [] if len(self.predictors) == 0: logging.debug('The Model has not been fitted!') if len(data.shape) == 1: data = np.reshape(data, [1, -1]) for i in data: # find k neighbors from train data knn_model = neighbors.KNeighborsClassifier(algorithm='ball_tree', n_neighbors=3).fit( self.train_data, self.train_label) knn_pre_label = knn_model.predict([i]) predicted_vector = self._use_predictors(i) # one row index = {l: i for i, l in enumerate(np.unique(self.train_label))} knn_pre_index = index[knn_pre_label[0]] # make it 0 when the knn predicted class is 0 for j in range(len(self.matrix[0])): if self.matrix[knn_pre_index][j] == 0: predicted_vector[j] = 0 self.predicted_vector.append(list(predicted_vector)) value = MT.closet_vector(predicted_vector, self.matrix, self.distance_measure) res.append(MT.get_key(self.index, value)) return np.array(res)
def fit(self, data, label): self.train_data, self.validate_data, self.train_label, self.validation_y = train_test_split( data, label, test_size=0.25) self.matrix, self.index, self.predictors, self.predictor_weights = \ self.create_matrix(self.train_data, self.train_label, self.validate_data, self.validation_y, self.estimator, **self.param) feature_subset = MT.get_subset_feature_from_matrix( self.matrix, self.index) for i in range(self.iter_num): y_pred = self.predict(self.validate_data) y_true = self.validation_y confusion_matrix = MT.create_confusion_matrix( y_true, y_pred, self.index) while True: max_index = np.argmax(confusion_matrix) max_index_y = np.floor(max_index / confusion_matrix.shape[1]) max_index_x = max_index % confusion_matrix.shape[1] label_y = MT.get_key(self.index, max_index_y) label_x = MT.get_key(self.index, max_index_x) score_result = 0 col_result = None est_result = None est_weight_result = None feature_subset_m = None feature_subset_n = None for m in range(len(feature_subset) - 1): for n in range(m + 1, len(feature_subset)): if ((label_y in feature_subset[m] and label_x in feature_subset[n]) or (label_y in feature_subset[n] and label_x in feature_subset[m])) \ and (set(feature_subset[m]).intersection(set(feature_subset[n])) == set()): col = MT.create_col_from_partition( feature_subset[m], feature_subset[n], self.index) if not MT.have_same_col(col, self.matrix): train_data, train_cla = MT.get_data_from_col( self.train_data, self.train_label, col, self.index) est = self.estimator(**self.param).fit( train_data, train_cla) validation_data, validation_cla = MT.get_data_from_col( self.validate_data, self.validation_y, col, self.index) if validation_data is None: score = 0.8 else: score = est.score(validation_data, validation_cla) if score >= score_result: score_result = score col_result = col est_result = est est_weight_result = MT.estimate_weight( 1 - score_result) feature_subset_m = m feature_subset_n = n if col_result is None: confusion_matrix[np.int(max_index_y), np.int(max_index_x)] = 0 if np.sum(confusion_matrix) == 0: break else: break try: self.matrix = np.hstack((self.matrix, col_result)) self.predictors.append(est_result) self.predictor_weights.append(est_weight_result) feature_subset.append(feature_subset[feature_subset_m] + feature_subset[feature_subset_n]) except (TypeError, ValueError): pass