예제 #1
0
    def _e_step(self, users, items, user_feat, target, pie, rate):
        """Performs the e-step of the EM algorithm to estimate the response values w_ijt.

         Args
        ------
            1. users:       <(D, ) int>      user ids
            2. items:       <(D, ) int>      item ids
            3. user_feat:   <(D, f) float>   user features values
            4. target:      <(D, ) int>      target rates
            5. pie:         <(D, ) float>    estimated mixing weights
            6. rate:        <(D, ) float>    estimated rate parameter

         Returns
        ---------
            1. w_ijt:    <(D, ) float>   estimated response values.
        """
        point = tm.get_point('_e_step')

        zero_mask = np.where(target == 0)[0]
        pois_prob = np.exp(objectives.pois_log_prob(target, rate))
        prob_from_rate = pie[zero_mask] * pois_prob[zero_mask]

        # Only need to update the w_ijt at the zero_mask, for the rest it has to come from the rate process so we can
        # leave it as 1.
        w_ijt = np.ones(user_feat.shape[0])
        w_ijt[zero_mask] = prob_from_rate / (prob_from_rate + 1 -
                                             pie[zero_mask])

        point.collect()

        return w_ijt
예제 #2
0
    def eta_likelihood(self, users, items, user_feat, w_ijt):
        """ Computes the likelihood conditioned on eta.

        This is the logistic likelihood function.

         Args
        ------
            1. users:       <(D, ) int>     user ids
            2. items:       <(D, ) int>     item ids
            3. user_feat:   <(D, f) float>  user features values
            4. w_ijt:       <(D, ) int>     target response values

         Returns
        ---------
            1. ll:       <float>  likelihood
        """
        point = tm.get_point('eta_logistic_likelihood')
        sig = self.sigmoid_func(users, items, user_feat)

        # For robustness making sure no one is totally 1 or totally.
        tmp = np.where(sig == 1)[0]
        sig[tmp] -= 1E-24

        tmp = np.where(sig == 0)[0]
        sig[tmp] += 1E-24

        ll = np.mean(w_ijt * np.log(sig) + (1 - w_ijt) * (np.log(1 - sig)))

        point.collect()

        return ll
예제 #3
0
    def get_est_lambda(self, users, items, features):
        """Estimates the \lambda parameters.

        This code uses the current \beta values and estimates it for each user i and item j pairs in the users and items
        vectors according to the corresponding features.

         Args
        ------
            1. users:       <(N, ) ndarray of type int>     user ids
            2. items:       <(N, ) ndarray of type int>     item ids
            3. features:    <(N, d) ndarray of type float>  features values.

         Returns
        ---------
            1. est_lamb:    <(N, ) ndarray of type float>   estimated lambdas.
        """
        if users.shape[0] != items.shape[0] or users.shape[
                0] != features.shape[0]:
            raise AssertionError(
                'Numbers of users, items and features have to be the same.')

        point = tm.get_point('get_est_lambda')
        beta_x = gd_commons.mul_feat_coeff(users,
                                           items,
                                           features,
                                           self.beta_0,
                                           self.beta_u,
                                           self.beta_i,
                                           num_proc=self.num_proc)
        est_lamb = np.exp(beta_x)
        point.collect()

        return est_lamb
예제 #4
0
    def _pois_reg_data_log_like(self,
                                target,
                                users,
                                items,
                                user_feat,
                                weights=None):
        """Computes the data log likelihood.

         Args
        ------
            1. users:       <(D, ) int>     user ids
            2. items:       <(D, ) int>     item ids
            3. user_feat:   <(D, f) float>  data-driven (non-intercept) and user const features
            4. target:      <(D, ) int>     target rates
            5. weights:     <(D, ) float>   points weights for the weighted regression case

         Returns
        ---------
            1. <float> average data log likelihood.
        """
        point = tm.get_point('pois_reg_data_log_like')

        est_lambda = self.get_est_lambda(users, items, user_feat)
        curr_ll = objectives.pois_log_prob(target, est_lambda)

        if weights is not None:
            # Adjusting the weights.
            curr_ll *= weights

        point.collect()

        return np.mean(curr_ll)
예제 #5
0
def grad_for_user(users, d_pois_reg_user, d_user_prior):
    """Computes the gradient for \beta_i including the user intercept.

     Args
    ------
        1. users:               <(D, ) int>      user ids
        2. d_pois_reg_user:     <(D, f) float>   derivative of user features
        3. d_user_prior:        <(N, f) float>   derivative of the user coefficient prior


     Returns
    ---------
        1. grad:    <(N, f) float>   gradient for each user
    """
    # I found to be easier on the cython part if you create the data structure outside instead of using malloc and free
    # inside the cython code.
    user_counts = np.zeros(d_user_prior.shape[0])
    grad = np.zeros(d_user_prior.shape)

    point = tm.get_point('grad_for_user')

    fm.grad_for_user(users, d_pois_reg_user, d_user_prior, user_counts, grad)

    point.collect()

    return grad
예제 #6
0
def grad_for_item(items, d_pois_reg_item, d_item_prior):
    """Computes the gradient for the item intercept.

     Args
    ------
        1. items:               <(D, ) int>     item ids
        2. d_pois_reg_item:     <(D, f) float>  derivative of item intercept
        3. d_item_prior:        <(M, f) float>  derivative of the item coefficient prior


     Returns
    ---------
        1. grad:    <(M, ) float>   gradient for each item
    """
    # I found to be easier on the cython part if you create the data structure outside instead of using malloc and free
    # inside the cython code.
    item_counts = np.zeros(d_item_prior.shape[0])
    grad = np.zeros(d_item_prior.shape[0])

    point = tm.get_point('grad_for_item')

    fm.grad_for_item(items, d_pois_reg_item, d_item_prior, item_counts, grad)

    point.collect()

    return grad
예제 #7
0
    def _mle(self, target, users, items, features, weights=None):
        point = tm.get_point('beta_mle_est_lambda')
        est_lambda = self.get_est_lambda(users, items, features)
        point.collect()

        point = tm.get_point('beta_mle_log_factorial')
        y_log_fact = helpers.log_factorial(target)
        point.collect()

        point = tm.get_point('beta_mle_curr_ll_numpy')
        curr_ll = (target * np.log(est_lambda)) - est_lambda - y_log_fact
        if weights is not None:
            curr_ll *= weights

        point.collect()

        return np.mean(curr_ll)
예제 #8
0
    def _beta_derivative_vals(self, users, items, user_feat, target):
        """Computes the derivations for each element in the matrix.

        Note that this is not where the gradient is computed, but just where each element in the feature table is
        derived. This also includes the two intercept. If there's a prior, it is derived as well.

        The reason for the separation is because of the fixed-regression in which we have a fixed effect for population,
        each individual and each item so it is easier to first compute the derivation at each point using mat operations
        and later compute the different gradients separately.

         Args
        ------
            1. users:       <(D, ) int>     user ids
            2. items:       <(D, ) int>     item ids
            3. user_feat:    <(D, f) float> user features values.
            4. target:      <(D, ) int>     target rates

         Returns
        ---------
            1. d_pois_reg:       <(D, f + 2) float>  derivative of ALL features
            2. d_0_prior:        <float>             derivative of the global intercept prior
            3. d_i_prior:        <(M, ) float>       derivative of the item intercept prior
            3. d_u_prior:        <(N, f) float>      derivative of the user \beta (including intercept)
        """
        point = tm.get_point('pois_regression_deriv')

        # First computing \beta_u * features. It's going to be needed in the derivation computation.
        beta_u_x = gd_commons.mul_feat_coeff(users, items, user_feat,
                                             self.beta_0, self.beta_u,
                                             self.beta_i)

        # Adding two columns of ones for the 'non-user' feat. This is done to make the computation easier using matrix
        # operations.
        f_const = np.hstack([np.ones([user_feat.shape[0], 2]), user_feat])

        # Computing the parts of the Poisson regression derivative
        d_features = f_const * np.atleast_2d(np.exp(beta_u_x)).T
        d_target = f_const * np.atleast_2d(target).T
        d_pois_reg = d_target - d_features

        # The dervation of the prior
        d_0_prior = self.gd_lamb * (self.beta_0 - self.beta_0_prior)
        d_i_prior = self.gd_lamb * (self.beta_i - self.beta_i_prior)
        d_u_prior = self.gd_lamb * (self.beta_u - self.beta_u_prior)

        point.collect()

        return d_pois_reg, d_0_prior, d_i_prior, d_u_prior
예제 #9
0
def fast_sample(num_points, batch_size):
    """Generates a choice sample of size batch_size from num_points.

     Args
    ------
        1. num_points:      <int>   number of points to choose from.
        2. btach_size:      <int>   number of points to sample.

     Returns
    ---------
        1. samp: <(batch_size, ) int>     indexes of selected points
    """
    point = tm.get_point('fast_sample_%d_%d' % (num_points, batch_size))

    samp = sampler.get_sample(num_points, batch_size)

    point.collect()

    return samp
예제 #10
0
    def get_est_lambda(self, users, items, user_feat):
        """Estimates the \lambda parameters.

        This code uses the current \beta values and estimates it for each user i and item j pairs in the users and items
        vectors according to the corresponding features.

         Args
        ------
            1. users:       <(D, ) int>      user ids
            2. items:       <(D, ) int>      item ids
            3. user_feat:   <(D, f) float>   user features values

         Returns
        ---------
            1. est_lamb:    <(D, ) ndarray of type float>   estimated lambdas.
        """
        point = tm.get_point('get_est_lambda')
        beta_x = gd_commons.mul_feat_coeff(users, items, user_feat,
                                           self.beta_0, self.beta_u,
                                           self.beta_i)
        est_lamb = np.exp(beta_x)
        point.collect()

        return est_lamb
예제 #11
0
    def learn_eta(self, users, items, user_feat, w_ijt):
        """Performs the e-step of the EM algorithm to estimate the response values w_ijt.

         Args
        ------
            1. users:       <(D, ) int>      user ids
            2. items:       <(D, ) int>      item ids
            3. user_feat:   <(D, f) float>   user features values
            4. w_ijt:       <(D, ) int>      target response values
        """
        self._initialize_eta(user_feat.shape[1])

        # Number of times the likelihood went down. Used to prevent overfitting and parameter explosion.
        num_down = 0

        prev_ll = curr_ll = -np.inf
        reached_conv = False

        for i in range(1, self.gd_max_iter + 1):
            # Sampling a mini-batch
            samp = gd_commons.fast_sample(user_feat.shape[0],
                                          self.gd_batch_size)

            eta_sgd_point = tm.get_point(
                'eta_sgd_iter')  # Taking this time point after the sample.

            d_features, d_0_prior, d_u_prior = self._eta_derivative_vals(
                users[samp], items[samp], user_feat[samp], w_ijt[samp])

            # ADAM initial values
            adam_vals_u = {
                'mean': np.zeros(self.eta_u.shape),
                'var': np.zeros(self.eta_u.shape),
                't': 0
            }
            adam_vals_0 = {'mean': 0, 'var': 0, 't': 0}

            g_grad = gd_commons.grad_for_global(d_features[:, 0], d_0_prior)
            u_grad = gd_commons.grad_for_user(users[samp], d_features[:, 1:],
                                              d_u_prior)

            # These operations are safe because if the user or item were not in the sample the grad for them will be
            # zero.
            self.eta_0 += gd_commons.get_adam_update(self.gd_step_size, g_grad,
                                                     adam_vals_0)
            self.eta_u += gd_commons.get_adam_update(self.gd_step_size, u_grad,
                                                     adam_vals_u)

            eta_sgd_point.collect()

            # Checking for convergence - using only the data likelihood.
            if i >= self.min_gd_iter and i % self.gd_ll_iters == 0:
                curr_ll = self.eta_likelihood(users, items, user_feat, w_ijt)

                if curr_ll < prev_ll:
                    num_down += 1

                log.info(
                    'ZipRegression.learn_eta: Data log like after %d iterations [%.5f --> %.5f]'
                    % (i, prev_ll, curr_ll))

                if np.abs(curr_ll - prev_ll
                          ) <= self.gd_tol or num_down >= self.gd_num_dec:
                    log.info(
                        'ZipRegression.learn_eta: Reached convergance after %d iterations'
                        % i)
                    reached_conv = True
                    break

                prev_ll = curr_ll

        if not reached_conv:
            log.info(
                'ZipRegression.learn_eta: Did not reach convergance after %d iterations'
                % self.gd_max_iter)

        log.info('ZipRegression.learn_eta: Train data log like %.3f' % curr_ll)
예제 #12
0
    def _learn_beta(self, users, items, user_feat, target, weights=None):
        """Learns all the \beta's using stochastic gradient descent with ADAM.

         Args
        ------
            1. users:       <(D, ) int>     user ids
            2. items:       <(D, ) int>     item ids
            3. user_feat:   <(D, f) float>  user features values
            4. target:      <(D, ) int>     target rates
            5. weights:     <(D, ) float>   points weights for the weighted regression case

         Raises
        --------
            1. ValueError if coefficients went out of hand and got the value of np.inf.
        """
        self._initialize_beta(user_feat.shape[1])

        # ADAM initial values
        adam_vals_u = {
            'mean': np.zeros(self.beta_u.shape),
            'var': np.zeros(self.beta_u.shape),
            't': 0
        }
        adam_vals_i = {
            'mean': np.zeros(self.beta_i.shape),
            'var': np.zeros(self.beta_i.shape),
            't': 0
        }
        adam_vals_0 = {'mean': 0, 'var': 0, 't': 0}

        # Number of times the likelihood went down. Used to prevent overfitting and parameter explosion.
        num_down = 0

        prev_ll = curr_ll = -np.inf
        reached_conv = False

        # Gradient descent main loop
        for i in range(1, self.gd_num_iter + 1):
            # Sampling a mini-bucket
            samp = gd_commons.fast_sample(user_feat.shape[0],
                                          self.gd_batch_size)

            point = tm.get_point('pois_reg_sgd_iter'
                                 )  # Taking this time point after the sample.

            # First computing all the derivative values. Not computing the gradients yet.
            d_pois_reg, d_0_prior, d_i_prior, d_u_prior = \
                self._beta_derivative_vals(users[samp], items[samp], user_feat[samp], target[samp])

            if weights is not None:
                # It's weighted regression and I need to modify the weight of each point.
                d_pois_reg *= np.atleast_2d(weights[samp]).T

            # Computing all the gradients
            g_grad = gd_commons.grad_for_global(d_pois_reg[:, 0], d_0_prior)
            i_grad = gd_commons.grad_for_item(items[samp], d_pois_reg[:, 1],
                                              d_i_prior)
            u_grad = gd_commons.grad_for_user(users[samp], d_pois_reg[:, 2:],
                                              d_u_prior)

            # These operations are safe because if the user or item were not in the sample the grad for them will be
            # zero.

            self.beta_0 += gd_commons.get_adam_update(self.gd_step_size,
                                                      g_grad, adam_vals_0)
            self.beta_i += gd_commons.get_adam_update(self.gd_step_size,
                                                      i_grad, adam_vals_i)
            self.beta_u += gd_commons.get_adam_update(self.gd_step_size,
                                                      u_grad, adam_vals_u)

            point.collect()

            # Checking for convergence - using only the data likelihood.
            if i > self.min_gd_iter and i % self.gd_ll_iters == 0:
                curr_ll = self._pois_reg_data_log_like(target, users, items,
                                                       user_feat, weights)

                if curr_ll < prev_ll:
                    num_down += 1

                if np.isnan(curr_ll) or np.isinf(curr_ll):
                    raise ValueError(
                        'Pois_Reg: Coefficient values went out of hand -- adjust regularizer value.'
                    )

                log.info('Pois_Reg data log like: [%.3f --> %.3f]' %
                         (prev_ll, curr_ll))

                if np.abs(curr_ll - prev_ll
                          ) <= self.gd_tol or num_down >= self.gd_num_dec:
                    log.info(
                        'Pois_Reg: Reached convergance after %d iterations' %
                        i)

                    reached_conv = True
                    break

                prev_ll = curr_ll

        if not reached_conv:
            log.error(
                'Pois_Reg: Did not reach convergence after %d iterations' %
                self.gd_num_iter)

        log.info('Pois_Reg: Train log like %.3f' % curr_ll)
예제 #13
0
    def _learn_beta(self, users, items, features, target, weights=None):
        # If any of the parameters wasn't initialized
        if self.beta_u is None:
            self.beta_u = np.random.normal(0, 0.1, [self.N, features.shape[1]])
        if self.beta_i is None:
            self.beta_i = np.random.normal(0, 0.1, self.M)
        if self.beta_0 is None:
            self.beta_0 = np.random.normal(0, 0.1, 1)[0]

        if self.gd_adam:
            adam_vals_u = {
                'mean': np.zeros(self.beta_u.shape),
                'var': np.zeros(self.beta_u.shape),
                't': 0
            }
            adam_vals_i = {
                'mean': np.zeros(self.beta_i.shape),
                'var': np.zeros(self.beta_i.shape),
                't': 0
            }
            adam_vals_0 = {'mean': 0, 'var': 0, 't': 0}

        # Computing the lambda array

        reached_conv = False
        for i in range(1, self.gd_num_iter + 1):
            beta_iter_point = tm.get_point('beta_sgd_iter')
            point = tm.get_point('beta_sgd_samp')
            if self.gd_weights_sample:
                samp = gd_commons.fast_sample_with_weights(weights)
            else:
                samp = gd_commons.fast_sample(features.shape[0],
                                              self.gd_batch_size)

            point.collect()

            point = tm.get_point('beta_derivative_vals')
            d_mle, d_g_prior, d_i_prior, d_u_prior = \
                self._beta_derivative_vals(users[samp], items[samp], features[samp], target[samp])

            point.collect()

            # TODO: Discuss the most proper way to combine the weights and the prior/regularization with Padhraic
            if weights is not None and not self.gd_weights_sample:
                # If it's weight sample no need to modify the mle with the weights
                d_mle *= np.atleast_2d(weights[samp]).T

            # Updating the gradient
            g_grad = gd_commons.grad_for_global(d_mle[:, 0], d_g_prior)
            i_grad = gd_commons.grad_for_item(items[samp], d_mle[:, 1],
                                              d_i_prior)
            u_grad = gd_commons.grad_for_user(users[samp], d_mle[:, 2:],
                                              d_u_prior)

            a = self.gd_step_size / self.decay if self.gd_decay else self.gd_step_size

            # These operations are safe because if the user or item were not in the sample the grad for them will be
            # zero.
            point = tm.get_point('beta_grad_updates')
            if self.gd_adam:
                self.beta_0 += gd_commons.get_AdaM_update(
                    a, g_grad, adam_vals_0)
                self.beta_i += gd_commons.get_AdaM_update(
                    a, i_grad, adam_vals_i)
                self.beta_u += gd_commons.get_AdaM_update(
                    a, u_grad, adam_vals_u)
            else:
                self.beta_0 += g_grad * a
                self.beta_i += i_grad * a
                self.beta_u += u_grad * a

            point.collect()

            beta_iter_point.collect()
            if i % self.gd_ll_iters == 0:
                point = tm.get_point('beta_mle')
                curr_ll = self._mle(target, users, items, features, weights)
                point.collect()
                if np.isnan(curr_ll) or np.isinf(curr_ll):
                    raise ValueError(
                        'Coefficient values went out of hand -- adjust lambda and/or step size'
                    )
                log.info('BETA GD MLE: [%.3f --> %.3f]' %
                         (self.prev_ll, curr_ll))
                if np.abs(curr_ll - self.prev_ll) <= self.gd_tol:
                    log.info(
                        'BETA GD: Reached convergance after %d iterations' % i)
                    reached_conv = True
                    self.prev_ll = curr_ll
                    break
                else:
                    self.prev_ll = curr_ll

                self.decay += 1

        if not reached_conv:
            log.error(
                'BETA GD: Did not reach convergance after %d iterations' %
                self.gd_num_iter)

        log.info('BETA GD: Train log like %.3f' % curr_ll)