def fit(self, x, y): # 将y转one-hot编码 class_num = np.amax(y) + 1 y_cate = np.zeros(shape=(len(y), class_num)) y_cate[np.arange(len(y)), y] = 1 # 扩展分类器 self.expand_base_estimators = [ copy.deepcopy(self.base_estimator) for _ in range(class_num) ] # 拟合第一个模型 y_pred_score_ = [] # TODO:并行优化 for class_index in range(0, class_num): self.expand_base_estimators[class_index][0].fit( x, y_cate[:, class_index]) y_pred_score_.append( self.expand_base_estimators[class_index][0].predict(x)) y_pred_score_ = np.c_[y_pred_score_].T # 计算负梯度 new_y = y_cate - utils.softmax(y_pred_score_) # 训练后续模型 for index in range(1, self.n_estimators): y_pred_score = [] for class_index in range(0, class_num): self.expand_base_estimators[class_index][index].fit( x, new_y[:, class_index]) y_pred_score.append( self.expand_base_estimators[class_index][index].predict(x)) y_pred_score_ += np.c_[y_pred_score].T * self.learning_rate new_y = y_cate - utils.softmax(y_pred_score_)
def fit(self, x): n_sample = x.shape[0] self.dbw.fit(x) x_bins = self.dbw.transform(x) # 初始化模型参数 self.default_y_prob = np.log(0.5 / self.n_components) # 默认p_y for y_label in range(0, self.n_components): self.p_x_y[y_label] = {} self.p_y[y_label] = np.log(1.0 / self.n_components) # 初始p_y设置一样 self.default_x_prob[y_label] = np.log(0.5 / n_sample) # 默认p_x_y # 初始p_x_y设置一样 for j in range(0, x_bins.shape[1]): self.p_x_y[y_label][j] = {} x_j_set = set(x_bins[:, j]) for x_j in x_j_set: # 随机抽样计算条件概率 sample_x_index = np.random.choice( n_sample, n_sample // self.n_components) sample_x_bins = x_bins[sample_x_index] p_x_y = (np.sum(sample_x_bins[:, j] == x_j) + 1) / (sample_x_bins.shape[0] + len(x_j_set)) self.p_x_y[y_label][j][x_j] = np.log(p_x_y) # 计算隐变量 W_log = self.get_log_w(x_bins) W = utils.softmax(W_log) W_gen = np.exp(W_log) current_log_loss = np.log(W_gen.sum(axis=1)).sum() # 迭代训练 current_epoch = 0 for _ in range(0, self.n_iter): if self.verbose is True: utils.plot_decision_function(x, self.predict(x), self) utils.plt.pause(0.1) utils.plt.clf() # 更新模型参数 for k in range(0, self.n_components): self.p_y[k] = np.log(np.sum(W[:, k]) / n_sample) for j in range(0, x_bins.shape[1]): x_j_set = set(x_bins[:, j]) for x_j in x_j_set: self.p_x_y[k][j][x_j] = np.log( 1e-10 + np.sum(W[:, k] * (x_bins[:, j] == x_j)) / np.sum(W[:, k])) # 更新隐变量 W_log = self.get_log_w(x_bins) W = utils.softmax(W_log) W_gen = np.exp(W_log) # 计算log like hold new_log_loss = np.log(W_gen.sum(axis=1)).sum() if new_log_loss - current_log_loss > self.tol: current_log_loss = new_log_loss current_epoch += 1 else: print('total epochs:', current_epoch) break if self.verbose: utils.plot_decision_function(x, self.predict(x), self) utils.plt.show()
def predict_proba(self, x): # TODO:并行优化 y_pred_score = [] for class_index in range(0, len(self.expand_base_estimators)): estimator_of_index = self.expand_base_estimators[class_index] y_pred_score.append( np.sum([estimator_of_index[0].predict(x)] + [ self.learning_rate * estimator_of_index[i].predict(x) for i in range(1, self.n_estimators - 1) ] + [estimator_of_index[self.n_estimators - 1].predict(x)], axis=0)) return utils.softmax(np.c_[y_pred_score].T)
def fit(self, x, y): # 将y转one-hot编码 class_num = np.amax(y) + 1 y_cate = np.zeros(shape=(len(y), class_num)) y_cate[np.arange(len(y)), y] = 1 # 扩展分类器 self.expand_base_estimators = [ copy.deepcopy(self.base_estimator) for _ in range(class_num) ] # 第一个模型假设预测为0 y_pred_score_ = np.zeros(shape=(x.shape[0], class_num)) # 计算一阶、二阶导数 g = utils.softmax(y_pred_score_) - y_cate h = utils.softmax(y_pred_score_) * (1 - utils.softmax(y_pred_score_)) # 训练后续模型 for index in range(0, self.n_estimators): y_pred_score = [] for class_index in range(0, class_num): self.expand_base_estimators[class_index][index].fit( x, g[:, class_index], h[:, class_index]) y_pred_score.append( self.expand_base_estimators[class_index][index].predict(x)) y_pred_score_ += np.c_[y_pred_score].T * self.learning_rate g = utils.softmax(y_pred_score_) - y_cate h = utils.softmax(y_pred_score_) * (1 - utils.softmax(y_pred_score_))
def fit(self, x, y): # 将y转one-hot编码 class_num = np.amax(y) + 1 self.class_num = class_num y_cate = np.zeros(shape=(len(y), class_num)) y_cate[np.arange(len(y)), y] = 1 self.weights = [[] for _ in range(0, class_num)] # 扩展分类器 self.expand_base_estimators = [ copy.deepcopy(self.base_estimator) for _ in range(class_num) ] # 拟合第一个模型 y_pred_score_ = [[] for _ in range(0, self.class_num)] # TODO:并行优化 for class_index in range(0, class_num): self.expand_base_estimators[class_index][0].fit( x, y_cate[:, class_index]) y_pred_score_[class_index].append( self.expand_base_estimators[class_index][0].predict(x)) self.weights[class_index].append(1.0) y_pred_result, ks = self._dropout(y_pred_score_) y_pred_result = np.c_[y_pred_result].T # 计算负梯度 new_y = y_cate - utils.softmax(y_pred_result) # 训练后续模型 for index in range(1, self.n_estimators): for class_index in range(0, class_num): self.expand_base_estimators[class_index][index].fit( x, new_y[:, class_index]) y_pred_score_[class_index].append( self.expand_base_estimators[class_index][index].predict(x)) self.weights[class_index].append(1.0 / (ks[class_index] + 1)) y_pred_result, ks = self._dropout(y_pred_score_) y_pred_result = np.c_[y_pred_result].T new_y = y_cate - utils.softmax(y_pred_result)
def predict_proba(self, x): rst = [] for x_row in x: tmp = [] for y_index in range(0, self.n_components): try: p_y_log = self.p_y[y_index] except: p_y_log = self.default_y_prob for i, xij in enumerate(x_row): p_y_log += np.log( 1e-12 + utils.gaussian_1d(xij, self.p_x_y[y_index][i][0], self.p_x_y[y_index][i][1])) tmp.append(p_y_log) rst.append(tmp) return utils.softmax(np.asarray(rst))
def predict_proba(self, x): x_bins = self.dbw.transform(x) rst = [] for x_row in x_bins: tmp = [] for y_index in range(0, self.n_components): try: p_y_log = self.p_y[y_index] except: p_y_log = self.default_y_prob for i, xij in enumerate(x_row): try: p_y_log += self.p_x_y[y_index][i][xij] except: p_y_log += self.default_x_prob[y_index] tmp.append(p_y_log) rst.append(tmp) return utils.softmax(np.asarray(rst))