Exemple #1
0
 def _get_gradient_hess(self, y, y_pred):
     """
     获取一阶、二阶导数信息
     :param y:真实值
     :param y_pred:预测值
     :return:
     """
     if self.loss == 'squarederror':
         return y_pred - y, np.ones_like(y)
     elif self.loss == 'logistic':
         return utils.sigmoid(y_pred) - utils.sigmoid(
             y), utils.sigmoid(y_pred) * (1 - utils.sigmoid(y_pred))
     elif self.loss == 'poisson':
         return np.exp(y_pred) - y, np.exp(y_pred)
     elif self.loss == 'gamma':
         return 1.0 - y * np.exp(-1.0 * y_pred), y * np.exp(-1.0 * y_pred)
     elif self.loss == 'tweedie':
         if self.p == 1:
             return np.exp(y_pred) - y, np.exp(y_pred)
         elif self.p == 2:
             return 1.0 - y * np.exp(-1.0 * y_pred), y * np.exp(
                 -1.0 * y_pred)
         else:
             return np.exp(y_pred * (2.0 - self.p)) - y * np.exp(
                 y_pred * (1.0 - self.p)), (2.0 - self.p) * np.exp(
                     y_pred * (2.0 - self.p)) - (1.0 - self.p) * y * np.exp(
                         y_pred * (1.0 - self.p))
Exemple #2
0
    def predict(self, X):
        """
        :param X:
        :return:
        """
        # 归一化
        if self.normal:
            X = (X - self.xmin) / self.xmax
        # reshape
        X = X[:, self.replace_ind]
        # 去掉第一列bias以及非组合特征
        X_ = X[:, self.positive_ind:]
        n_sample, n_feature = X_.shape
        pol = np.zeros(n_sample)
        for i in range(0, n_feature - 1):
            for j in range(i + 1, n_feature):
                pol += X_[:, i] * X_[:, j] * np.dot(self.V[i, self.fields[self.positive_ind + j]],
                                                    self.V[j, self.fields[self.positive_ind + i]])

        linear_rst = np.c_[np.ones(n_sample), X] @ self.w.reshape(-1) + pol
        if self.objective == "squarederror":
            return linear_rst
        elif self.objective in ["poisson", "gamma", "tweedie"]:
            return np.exp(linear_rst)
        else:
            return utils.sigmoid(linear_rst) > 0.5
Exemple #3
0
 def predict_proba(self, X):
     """
     :param X:
     :return:
     """
     if self.normal:
         X = (X - self.xmin) / self.xmax
     n_sample, n_feature = X.shape
     X_V = X @ self.V
     X_V_2 = X_V * X_V
     X_2_V_2 = (X * X) @ (self.V * self.V)
     pol = 0.5 * np.sum(X_V_2 - X_2_V_2, axis=1)
     linear_rst = np.c_[np.ones(n_sample), X] @ self.w.reshape(-1) + pol
     pos_proba = utils.sigmoid(linear_rst)
     return np.c_[1.0 - pos_proba, pos_proba]
Exemple #4
0
 def predict(self, X):
     """
     :param X:
     :return:
     """
     if self.normal:
         X = (X - self.xmin) / self.xmax
     n_sample, n_feature = X.shape
     X_V = X @ self.V
     X_V_2 = X_V * X_V
     X_2_V_2 = (X * X) @ (self.V * self.V)
     pol = 0.5 * np.sum(X_V_2 - X_2_V_2, axis=1)
     linear_rst = np.c_[np.ones(n_sample), X] @ self.w.reshape(-1) + pol
     if self.objective == "squarederror":
         return linear_rst
     elif self.objective in ["poisson", "gamma", "tweedie"]:
         return np.exp(linear_rst)
     else:
         return utils.sigmoid(linear_rst) > 0.5
Exemple #5
0
 def predict_proba(self, X):
     """
     logistic regression用
     :param X:
     :return:
     """
     # 归一化
     if self.normal:
         X = (X - self.xmin) / self.xmax
     # reshape
     X = X[:, self.replace_ind]
     # 去掉第一列bias以及非组合特征
     X_ = X[:, self.positive_ind:]
     n_sample, n_feature = X_.shape
     pol = np.zeros(n_sample)
     for i in range(0, n_feature - 1):
         for j in range(i + 1, n_feature):
             pol += X_[:, i] * X_[:, j] * np.dot(self.V[i, self.fields[self.positive_ind + j]],
                                                 self.V[j, self.fields[self.positive_ind + i]])
     pos_proba = utils.sigmoid(np.c_[np.ones(n_sample), X] @ self.w.reshape(-1) + pol)
     return np.c_[1.0 - pos_proba, pos_proba]
Exemple #6
0
    def fit(self, X, y, eval_set=None, show_log=True):
        X_o = X.copy()
        if self.normal:
            self.xmin = X.min(axis=0)
            self.xmax = X.max(axis=0) + 1e-8
            X = (X - self.xmin) / self.xmax
        n_sample, n_feature = X.shape
        x_y = np.c_[np.ones(n_sample), X, y]
        # 记录loss
        train_losses = []
        eval_losses = []
        # 调整一下学习率
        if self.adjust_lr:
            self.lr = max(self.lr, 1 / n_feature)
        # 初始化参数
        self.w = np.random.random((n_feature + 1, 1)) * 1e-3
        self.V = np.random.random((n_feature, self.hidden_dim)) * 1e-3
        if self.solver == 'adam':
            # 缓存梯度一阶,二阶估计
            w_1 = np.zeros_like(self.w)
            V_1 = np.zeros_like(self.V)
            w_2 = np.zeros_like(self.w)
            V_2 = np.zeros_like(self.V)
        # 更新参数
        count = 0
        for epoch in range(self.epochs):
            # 验证集记录
            best_eval_value = np.power(2., 1023)
            eval_count = 0
            np.random.shuffle(x_y)
            for index in range(x_y.shape[0] // self.batch_size):
                count += 1
                batch_x_y = x_y[self.batch_size * index:self.batch_size *
                                (index + 1)]
                batch_x = batch_x_y[:, :-1]
                batch_y = batch_x_y[:, -1:]
                # 计算链式求导第一层梯度
                if self.objective == "squarederror":
                    y_x_t = self._y(batch_x).reshape((-1, 1)) - batch_y
                elif self.objective == "poisson":
                    y_x_t = np.exp(self._y(batch_x).reshape((-1, 1))) - batch_y
                elif self.objective == "gamma":
                    y_x_t = 1.0 - batch_y * np.exp(
                        -1.0 * self._y(batch_x).reshape((-1, 1)))
                elif self.objective == 'tweedie':
                    if self.tweedie_p == 1:
                        y_x_t = np.exp(self._y(batch_x).reshape(
                            (-1, 1))) - batch_y
                    elif self.tweedie_p == 2:
                        y_x_t = 1.0 - batch_y * np.exp(
                            -1.0 * self._y(batch_x).reshape((-1, 1)))
                    else:
                        y_x_t = np.exp(self._y(batch_x).reshape((-1, 1)) * (2.0 - self.tweedie_p)) \
                                - batch_y * np.exp(self._y(batch_x).reshape((-1, 1)) * (1.0 - self.tweedie_p))
                else:
                    # 二分类
                    y_x_t = utils.sigmoid(self._y(batch_x).reshape(
                        (-1, 1))) - batch_y

                # 更新w
                w_reg = self.lamb * self.w + self.alpha * np.where(
                    self.w > 0, 1, 0)
                w_reg[0, 0] = 0.0
                w_grad = (np.sum(y_x_t * batch_x, axis=0) /
                          self.batch_size).reshape((-1, 1)) + w_reg
                if self.solver == 'sgd':
                    self.w = self.w - self.lr * w_grad
                elif self.solver == 'adam':
                    w_1 = self.rho_1 * w_1 + (1 - self.rho_1) * w_grad
                    w_2 = self.rho_2 * w_2 + (1 - self.rho_2) * w_grad * w_grad
                    w_1_ = w_1 / (1 - np.power(self.rho_1, count))
                    w_2_ = w_2 / (1 - np.power(self.rho_2, count))
                    self.w = self.w - (self.lr * w_1_) / (np.sqrt(w_2_) + 1e-8)

                # 更新 V
                batch_x_ = batch_x[:, 1:]
                V_X = batch_x_ @ self.V
                X_2 = batch_x_ * batch_x_
                # 从i,f单个元素逐步更新有点慢
                # for i in range(self.V.shape[0]):
                #     for f in range(self.V.shape[1]):
                #         if self.solver == "sgd":
                #             self.V[i, f] -= self.lr * (
                #                 np.sum(y_x_t.reshape(-1) * (batch_x_[:, i] * V_X[:, f] - self.V[i, f] * X_2[:, i]))
                #                 / self.batch_size + self.lamb * self.V[i, f] + self.alpha * (self.V[i, f] > 0))
                #         elif self.solver == "adam":
                #             v_reg = self.lamb * self.V[i, f] + self.alpha * (self.V[i, f] > 0)
                #             v_grad = np.sum(y_x_t.reshape(-1) * (
                #                 batch_x_[:, i] * V_X[:, f] - self.V[i, f] * X_2[:, i])) / self.batch_size + v_reg
                #             V_1[i, f] = self.rho_1 * V_1[i, f] + (1 - self.rho_1) * v_grad
                #             V_2[i, f] = self.rho_2 * V_2[i, f] + (1 - self.rho_2) * v_grad * v_grad
                #             v_1_ = V_1[i, f] / (1 - np.power(self.rho_1, count))
                #             v_2_ = V_2[i, f] / (1 - np.power(self.rho_2, count))
                #             self.V[i, f] = self.V[i, f] - (self.lr * v_1_) / (np.sqrt(v_2_) + 1e-8)

                # 从隐变量的维度进行更新
                for f in range(self.V.shape[1]):
                    V_reg = self.lamb * self.V[:, f] + self.alpha * (
                        self.V[:, f] > 0)
                    V_grad = np.sum(y_x_t * (batch_x_ * V_X[:, f].reshape(
                        (-1, 1)) - X_2 * self.V[:, f]),
                                    axis=0) + V_reg
                    if self.solver == 'sgd':
                        self.V[:, f] = self.V[:, f] - self.lr * V_grad
                    elif self.solver == 'adam':
                        V_1[:,
                            f] = self.rho_1 * V_1[:, f] + (1 -
                                                           self.rho_1) * V_grad
                        V_2[:, f] = self.rho_2 * V_2[:, f] + (
                            1 - self.rho_2) * V_grad * V_grad
                        V_1_ = V_1[:, f] / (1 - np.power(self.rho_1, count))
                        V_2_ = V_2[:, f] / (1 - np.power(self.rho_2, count))
                        self.V[:, f] = self.V[:, f] - (self.lr * V_1_) / (
                            np.sqrt(V_2_) + 1e-8)

                # 计算eval loss
                eval_loss = None
                if eval_set is not None:
                    eval_x, eval_y = eval_set
                    eval_loss = np.std(eval_y - self.predict(eval_x))
                    eval_losses.append(eval_loss)
                # 是否显示
                if show_log:
                    train_loss = np.std(y - self.predict(X_o))
                    print("epoch:", epoch + 1, "/", self.epochs, ",samples:",
                          (index + 1) * self.batch_size, "/", n_sample,
                          ",train loss:", train_loss, ",eval loss:", eval_loss)
                    train_losses.append(train_loss)
                # 是否早停
                if eval_loss is not None and self.early_stopping_rounds is not None:
                    if eval_loss < best_eval_value:
                        eval_count = 0
                        best_eval_value = eval_loss
                    else:
                        eval_count += 1
                    if eval_count >= self.early_stopping_rounds:
                        print(
                            "---------------early_stopping-----------------------------"
                        )
                        break

        return train_losses, eval_losses
Exemple #7
0
    def fit(self, X, y, eval_set=None, show_log=False, fields=None):
        """
        :param X:
        :param y:
        :param eval_set:
        :param show_log:
        :param fields: 为None时,退化为FM
        :return:
        """
        X_o = X.copy()

        # 归一化
        if self.normal:
            self.xmin = X.min(axis=0)
            self.xmax = X.max(axis=0) + 1e-7
            X = (X - self.xmin) / self.xmax

        n_sample, n_feature = X.shape
        # 处理fields
        if fields is None:
            self.replace_ind = list(range(0, n_feature))
            self.positive_ind = 0
            self.fields = [0] * n_feature
            self.field_num = 1
        else:
            self.replace_ind = np.argsort(fields).tolist()
            self.positive_ind = np.sum([1 if item < 0 else 0 for item in fields])
            self.fields = sorted(fields)
            self.field_num = len(set(self.fields[self.positive_ind:]))

        # reshape X
        X = X[:, self.replace_ind]

        x_y = np.c_[np.ones(n_sample), X, y]
        # 记录loss
        train_losses = []
        eval_losses = []
        # 调整一下学习率
        if self.adjust_lr:
            self.lr = max(self.lr, 1 / n_feature)
        # 初始化参数
        self.w = np.random.random((n_feature + 1, 1)) * 1e-3
        self.V = np.random.random((n_feature - self.positive_ind, self.field_num, self.hidden_dim)) * 1e-3
        if self.solver == 'adam':
            # 缓存梯度一阶,二阶估计
            w_1 = np.zeros_like(self.w)
            V_1 = np.zeros_like(self.V)
            w_2 = np.zeros_like(self.w)
            V_2 = np.zeros_like(self.V)
        # 更新参数
        count = 0
        for epoch in range(self.epochs):
            # 验证集记录
            best_eval_value = np.power(2., 1023)
            eval_count = 0
            np.random.shuffle(x_y)
            for index in range(x_y.shape[0] // self.batch_size):
                count += 1
                batch_x_y = x_y[self.batch_size * index:self.batch_size * (index + 1)]
                batch_x = batch_x_y[:, :-1]
                batch_y = batch_x_y[:, -1:]
                # 计算链式求导第一层梯度
                if self.objective == "squarederror":
                    y_x_t = self._y(batch_x).reshape((-1, 1)) - batch_y
                elif self.objective == "poisson":
                    y_x_t = np.exp(self._y(batch_x).reshape((-1, 1))) - batch_y
                elif self.objective == "gamma":
                    y_x_t = 1.0 - batch_y * np.exp(-1.0 * self._y(batch_x).reshape((-1, 1)))
                elif self.objective == 'tweedie':
                    if self.tweedie_p == 1:
                        y_x_t = np.exp(self._y(batch_x).reshape((-1, 1))) - batch_y
                    elif self.tweedie_p == 2:
                        y_x_t = 1.0 - batch_y * np.exp(-1.0 * self._y(batch_x).reshape((-1, 1)))
                    else:
                        y_x_t = np.exp(self._y(batch_x).reshape((-1, 1)) * (2.0 - self.tweedie_p)) \
                                - batch_y * np.exp(self._y(batch_x).reshape((-1, 1)) * (1.0 - self.tweedie_p))
                else:
                    # 二分类
                    y_x_t = utils.sigmoid(self._y(batch_x).reshape((-1, 1))) - batch_y

                # 更新w
                if self.solver == 'sgd':
                    self.w = self.w - (self.lr * (np.sum(y_x_t * batch_x, axis=0) / self.batch_size).reshape(
                        (-1, 1)) + self.lamb * self.w + self.alpha * np.where(self.w > 0, 1, 0))
                elif self.solver == 'adam':
                    w_reg = self.lamb * self.w + self.alpha * np.where(self.w > 0, 1, 0)
                    w_grad = (np.sum(y_x_t * batch_x, axis=0) / self.batch_size).reshape(
                        (-1, 1)) + w_reg
                    w_1 = self.rho_1 * w_1 + (1 - self.rho_1) * w_grad
                    w_2 = self.rho_2 * w_2 + (1 - self.rho_2) * w_grad * w_grad
                    w_1_ = w_1 / (1 - np.power(self.rho_1, count))
                    w_2_ = w_2 / (1 - np.power(self.rho_2, count))
                    self.w = self.w - (self.lr * w_1_) / (np.sqrt(w_2_) + 1e-8)

                # 更新 V
                batch_x_ = batch_x[:, 1 + self.positive_ind:]
                # 逐元素更新
                for i in range(0, batch_x_.shape[1] - 1):
                    for j in range(i + 1, batch_x_.shape[1]):
                        for k in range(0, self.hidden_dim):
                            v_reg_l = self.lamb * self.V[i, self.fields[self.positive_ind + j], k] + \
                                      self.alpha * (self.V[i, self.fields[self.positive_ind + j], k] > 0)

                            v_grad_l = np.sum(y_x_t.reshape(-1) * batch_x_[:, i] * batch_x_[:, j] *
                                              self.V[
                                                  j, self.fields[self.positive_ind + i], k]) / self.batch_size + v_reg_l

                            v_reg_r = self.lamb * self.V[j, self.fields[self.positive_ind + i], k] + \
                                      self.alpha * (self.V[j, self.fields[self.positive_ind + i], k] > 0)

                            v_grad_r = np.sum(y_x_t.reshape(-1) * batch_x_[:, i] * batch_x_[:, j] *
                                              self.V[
                                                  i, self.fields[self.positive_ind + j], k]) / self.batch_size + v_reg_r

                            if self.solver == "sgd":
                                self.V[i, self.fields[self.positive_ind + j], k] -= self.lr * v_grad_l
                                self.V[j, self.fields[self.positive_ind + i], k] -= self.lr * v_grad_r
                            elif self.solver == "adam":
                                V_1[i, self.fields[self.positive_ind + j], k] = self.rho_1 * V_1[
                                    i, self.fields[self.positive_ind + j], k] + (1 - self.rho_1) * v_grad_l
                                V_2[i, self.fields[self.positive_ind + j], k] = self.rho_2 * V_2[
                                    i, self.fields[self.positive_ind + j], k] + (1 - self.rho_2) * v_grad_l * v_grad_l
                                v_1_l = V_1[i, self.fields[self.positive_ind + j], k] / (
                                    1 - np.power(self.rho_1, count))
                                v_2_l = V_2[i, self.fields[self.positive_ind + j], k] / (
                                    1 - np.power(self.rho_2, count))

                                V_1[j, self.fields[self.positive_ind + i], k] = self.rho_1 * V_1[
                                    j, self.fields[self.positive_ind + i], k] + (1 - self.rho_1) * v_grad_r
                                V_2[j, self.fields[self.positive_ind + i], k] = self.rho_2 * V_2[
                                    j, self.fields[self.positive_ind + i], k] + (1 - self.rho_2) * v_grad_r * v_grad_r
                                v_1_r = V_1[j, self.fields[self.positive_ind + i], k] / (
                                    1 - np.power(self.rho_1, count))
                                v_2_r = V_2[j, self.fields[self.positive_ind + i], k] / (
                                    1 - np.power(self.rho_2, count))

                                self.V[i, self.fields[self.positive_ind + j], k] -= (self.lr * v_1_l) / (
                                    np.sqrt(v_2_l) + 1e-8)

                                self.V[j, self.fields[self.positive_ind + i], k] -= (self.lr * v_1_r) / (
                                    np.sqrt(v_2_r) + 1e-8)

                # 计算eval loss
                eval_loss = None
                if eval_set is not None:
                    eval_x, eval_y = eval_set
                    if self.objective == 'logistic':
                        eval_loss = np.mean(eval_y != self.predict(eval_x))
                    else:
                        eval_loss = np.std(eval_y - self.predict(eval_x))
                    eval_losses.append(eval_loss)
                # 是否显示
                if show_log:
                    if self.objective == 'logistic':
                        train_loss = np.mean(y != self.predict(X_o))
                    else:
                        train_loss = np.std(y - self.predict(X_o))
                    print("epoch:", epoch + 1, "/", self.epochs, ",samples:", (index + 1) * self.batch_size, "/",
                          n_sample,
                          ",train loss:",
                          train_loss, ",eval loss:", eval_loss)
                    train_losses.append(train_loss)
                # 是否早停
                if eval_loss is not None and self.early_stopping_rounds is not None:
                    if eval_loss < best_eval_value:
                        eval_count = 0
                        best_eval_value = eval_loss
                    else:
                        eval_count += 1
                    if eval_count >= self.early_stopping_rounds:
                        print("---------------early_stopping-----------------------------")
                        break

        return train_losses, eval_losses