def fit(self, x, y):
        # 将y转one-hot编码
        class_num = np.amax(y) + 1
        y_cate = np.zeros(shape=(len(y), class_num))
        y_cate[np.arange(len(y)), y] = 1

        # 扩展分类器
        self.expand_base_estimators = [
            copy.deepcopy(self.base_estimator) for _ in range(class_num)
        ]

        # 拟合第一个模型
        y_pred_score_ = []
        # TODO:并行优化
        for class_index in range(0, class_num):
            self.expand_base_estimators[class_index][0].fit(
                x, y_cate[:, class_index])
            y_pred_score_.append(
                self.expand_base_estimators[class_index][0].predict(x))
        y_pred_score_ = np.c_[y_pred_score_].T
        # 计算负梯度
        new_y = y_cate - utils.softmax(y_pred_score_)
        # 训练后续模型
        for index in range(1, self.n_estimators):
            y_pred_score = []
            for class_index in range(0, class_num):
                self.expand_base_estimators[class_index][index].fit(
                    x, new_y[:, class_index])
                y_pred_score.append(
                    self.expand_base_estimators[class_index][index].predict(x))
            y_pred_score_ += np.c_[y_pred_score].T * self.learning_rate
            new_y = y_cate - utils.softmax(y_pred_score_)
Beispiel #2
0
    def fit(self, x):
        n_sample = x.shape[0]
        self.dbw.fit(x)
        x_bins = self.dbw.transform(x)
        # 初始化模型参数
        self.default_y_prob = np.log(0.5 / self.n_components)  # 默认p_y
        for y_label in range(0, self.n_components):
            self.p_x_y[y_label] = {}
            self.p_y[y_label] = np.log(1.0 / self.n_components)  # 初始p_y设置一样
            self.default_x_prob[y_label] = np.log(0.5 / n_sample)  # 默认p_x_y
            # 初始p_x_y设置一样
            for j in range(0, x_bins.shape[1]):
                self.p_x_y[y_label][j] = {}
                x_j_set = set(x_bins[:, j])
                for x_j in x_j_set:
                    # 随机抽样计算条件概率
                    sample_x_index = np.random.choice(
                        n_sample, n_sample // self.n_components)
                    sample_x_bins = x_bins[sample_x_index]
                    p_x_y = (np.sum(sample_x_bins[:, j] == x_j) +
                             1) / (sample_x_bins.shape[0] + len(x_j_set))
                    self.p_x_y[y_label][j][x_j] = np.log(p_x_y)
        # 计算隐变量
        W_log = self.get_log_w(x_bins)
        W = utils.softmax(W_log)
        W_gen = np.exp(W_log)
        current_log_loss = np.log(W_gen.sum(axis=1)).sum()
        # 迭代训练
        current_epoch = 0
        for _ in range(0, self.n_iter):
            if self.verbose is True:
                utils.plot_decision_function(x, self.predict(x), self)
                utils.plt.pause(0.1)
                utils.plt.clf()
            # 更新模型参数
            for k in range(0, self.n_components):
                self.p_y[k] = np.log(np.sum(W[:, k]) / n_sample)
                for j in range(0, x_bins.shape[1]):
                    x_j_set = set(x_bins[:, j])
                    for x_j in x_j_set:
                        self.p_x_y[k][j][x_j] = np.log(
                            1e-10 +
                            np.sum(W[:, k] *
                                   (x_bins[:, j] == x_j)) / np.sum(W[:, k]))

            # 更新隐变量
            W_log = self.get_log_w(x_bins)
            W = utils.softmax(W_log)
            W_gen = np.exp(W_log)
            # 计算log like hold
            new_log_loss = np.log(W_gen.sum(axis=1)).sum()
            if new_log_loss - current_log_loss > self.tol:
                current_log_loss = new_log_loss
                current_epoch += 1
            else:
                print('total epochs:', current_epoch)
                break
        if self.verbose:
            utils.plot_decision_function(x, self.predict(x), self)
            utils.plt.show()
 def predict_proba(self, x):
     # TODO:并行优化
     y_pred_score = []
     for class_index in range(0, len(self.expand_base_estimators)):
         estimator_of_index = self.expand_base_estimators[class_index]
         y_pred_score.append(
             np.sum([estimator_of_index[0].predict(x)] + [
                 self.learning_rate * estimator_of_index[i].predict(x)
                 for i in range(1, self.n_estimators - 1)
             ] + [estimator_of_index[self.n_estimators - 1].predict(x)],
                    axis=0))
     return utils.softmax(np.c_[y_pred_score].T)
Beispiel #4
0
    def fit(self, x, y):
        # 将y转one-hot编码
        class_num = np.amax(y) + 1
        y_cate = np.zeros(shape=(len(y), class_num))
        y_cate[np.arange(len(y)), y] = 1

        # 扩展分类器
        self.expand_base_estimators = [
            copy.deepcopy(self.base_estimator) for _ in range(class_num)
        ]

        # 第一个模型假设预测为0
        y_pred_score_ = np.zeros(shape=(x.shape[0], class_num))
        # 计算一阶、二阶导数
        g = utils.softmax(y_pred_score_) - y_cate
        h = utils.softmax(y_pred_score_) * (1 - utils.softmax(y_pred_score_))
        # 训练后续模型
        for index in range(0, self.n_estimators):
            y_pred_score = []
            for class_index in range(0, class_num):
                self.expand_base_estimators[class_index][index].fit(
                    x, g[:, class_index], h[:, class_index])
                y_pred_score.append(
                    self.expand_base_estimators[class_index][index].predict(x))
            y_pred_score_ += np.c_[y_pred_score].T * self.learning_rate
            g = utils.softmax(y_pred_score_) - y_cate
            h = utils.softmax(y_pred_score_) * (1 -
                                                utils.softmax(y_pred_score_))
Beispiel #5
0
    def fit(self, x, y):
        # 将y转one-hot编码
        class_num = np.amax(y) + 1
        self.class_num = class_num
        y_cate = np.zeros(shape=(len(y), class_num))
        y_cate[np.arange(len(y)), y] = 1

        self.weights = [[] for _ in range(0, class_num)]

        # 扩展分类器
        self.expand_base_estimators = [
            copy.deepcopy(self.base_estimator) for _ in range(class_num)
        ]

        # 拟合第一个模型
        y_pred_score_ = [[] for _ in range(0, self.class_num)]
        # TODO:并行优化
        for class_index in range(0, class_num):
            self.expand_base_estimators[class_index][0].fit(
                x, y_cate[:, class_index])
            y_pred_score_[class_index].append(
                self.expand_base_estimators[class_index][0].predict(x))
            self.weights[class_index].append(1.0)
        y_pred_result, ks = self._dropout(y_pred_score_)
        y_pred_result = np.c_[y_pred_result].T
        # 计算负梯度
        new_y = y_cate - utils.softmax(y_pred_result)
        # 训练后续模型
        for index in range(1, self.n_estimators):
            for class_index in range(0, class_num):
                self.expand_base_estimators[class_index][index].fit(
                    x, new_y[:, class_index])
                y_pred_score_[class_index].append(
                    self.expand_base_estimators[class_index][index].predict(x))
                self.weights[class_index].append(1.0 / (ks[class_index] + 1))
            y_pred_result, ks = self._dropout(y_pred_score_)
            y_pred_result = np.c_[y_pred_result].T
            new_y = y_cate - utils.softmax(y_pred_result)
Beispiel #6
0
 def predict_proba(self, x):
     rst = []
     for x_row in x:
         tmp = []
         for y_index in range(0, self.n_components):
             try:
                 p_y_log = self.p_y[y_index]
             except:
                 p_y_log = self.default_y_prob
             for i, xij in enumerate(x_row):
                 p_y_log += np.log(
                     1e-12 +
                     utils.gaussian_1d(xij, self.p_x_y[y_index][i][0],
                                       self.p_x_y[y_index][i][1]))
             tmp.append(p_y_log)
         rst.append(tmp)
     return utils.softmax(np.asarray(rst))
Beispiel #7
0
 def predict_proba(self, x):
     x_bins = self.dbw.transform(x)
     rst = []
     for x_row in x_bins:
         tmp = []
         for y_index in range(0, self.n_components):
             try:
                 p_y_log = self.p_y[y_index]
             except:
                 p_y_log = self.default_y_prob
             for i, xij in enumerate(x_row):
                 try:
                     p_y_log += self.p_x_y[y_index][i][xij]
                 except:
                     p_y_log += self.default_x_prob[y_index]
             tmp.append(p_y_log)
         rst.append(tmp)
     return utils.softmax(np.asarray(rst))