Example #1
0
    def load_model(path):
        model_name = 'zip'
        log.info('ZipRegression.load_model: Loading model %s from path %s' %
                 (model_name, path))

        eta_0 = flu.np_load(join(path, '%s_eta_0.npy' % model_name))
        eta_0 = np.atleast_1d(eta_0)[0]

        eta_u = flu.np_load(join(path, '%s_eta_u.npy' % model_name))

        beta_0 = flu.np_load(join(path, '%s_beta_0.npy' % model_name))
        beta_0 = np.atleast_1d(beta_0)[0]

        beta_i = flu.np_load(join(path, '%s_beta_i.npy' % model_name))
        beta_u = flu.np_load(join(path, '%s_beta_u.npy' % model_name))

        n = beta_u.shape[0]
        m = beta_i.shape[0]

        model = ZipRegression(N=n,
                              M=m,
                              eta_0=eta_0,
                              eta_u=eta_u,
                              beta_0=beta_0,
                              beta_i=beta_i,
                              beta_u=beta_u)
        model.trained_users = flu.np_load(
            join(path, '%s_trained_users.npy' % model_name))
        model.trained = True

        return model
Example #2
0
    def test_log_prob(self,
                      users,
                      items,
                      data_feat,
                      target,
                      return_vals=False):
        """Evaluates Log-Likelihood accuracy on test data.

         Args
        ------
            1. users:       <(D, ) int>     user ids
            2. items:       <(D, ) int>     item ids
            3. data_feat:   <(D, f) float>  data-driven (non-intercept) features
            4. target:      <(D, ) int>     target rates
            5. return_vals: <bool>          if True returns all the values instead of the mean (default = False)

         Returns
        ---------
            1. <float> average log-likelihood (if return_vals is False)
            2  <(D, ) float> log likelihood for each point (if return_vals is True)
            3. -np.inf if model is not trained.
        """
        if not self.trained:
            return -np.inf

        # At optimization time - it is very likely that some users don't have train data (because they're not active
        # yet). This makes sure that I'm not testing on them.
        test_users = np.unique(users)
        trained_user_mask = np.where(
            np.in1d(users, self.trained_users, assume_unique=False))
        log.info('Trained on %d out of %d test users' %
                 (self.trained_users.shape[0], test_users.shape[0]))

        user_feat = np.hstack([np.ones([data_feat.shape[0], 1]), data_feat])
        lambda_est = self.pos_model.get_est_lambda(
            users[trained_user_mask], items[trained_user_mask],
            user_feat[trained_user_mask])

        pis = self.sigmoid_func(users[trained_user_mask],
                                items[trained_user_mask],
                                user_feat[trained_user_mask])

        vals = self.data_log_like(target[trained_user_mask], lambda_est, pis)

        if return_vals:
            return vals
        else:
            return np.mean(vals)
Example #3
0
    def load_model(path, num_proc):
        log.info('Loading ZIP model from path %s with %d num proc' %
                 (path, num_proc))

        beta_0 = flu.np_load(join(path, 'pos_beta_0.npy'))
        beta_0 = np.atleast_1d(beta_0)[0]

        beta_i = flu.np_load(join(path, 'pos_beta_i.npy'))
        beta_u = flu.np_load(join(path, 'pos_beta_u.npy'))

        # TODO(MOSHE): Why do I need num_proc here??? And how do I deal with no N and M?
        # TODO(MOSHE): Specifically M because N can be taken from the coefficients
        return PoissonRegression(beta_0=beta_0,
                                 beta_i=beta_i,
                                 beta_u=beta_u,
                                 num_proc=num_proc)
Example #4
0
    def test_abs_error(self, users, items, data_feat, target):
        if not self.trained:
            return np.inf

        # At optimization time - it is very likely that some users don't have train data (because they're not active
        # yet). This makes sure that I'm not testing on them.
        test_users = np.unique(users)
        trained_user_mask = np.where(
            np.in1d(users, self.trained_users, assume_unique=False))
        log.info('Trained on %d out of %d test users' %
                 (self.trained_users.shape[0], test_users.shape[0]))

        lambda_est = self.predict(users[trained_user_mask],
                                  items[trained_user_mask],
                                  data_feat[trained_user_mask])
        return np.mean(np.abs(lambda_est - target[trained_user_mask]))
Example #5
0
def np_save(path, file_name, data):
    """
    Wrapper for np.save that also creates the dir if doesn't exist

     INPUT:
    -------
        1. path:        <sting>     dir path
        2. file_name:   <string>    file name
        3. data:        <ndarray>   numpy array
    """
    log.info('Saving file %s/%s' % (path, file_name))
    make_dir(path)

    start = time.time()
    np.save(join(path, file_name), data)
    os.chmod(join(path, file_name), 0770)
    log.info('Saving took %d seconds' % (time.time() - start))
Example #6
0
def np_load(file_path):
    """
    Wrapper fpr the np.load that also prints time.

     INPUT:
    -------
        1. file_path:   <string>    file path

     OUTPUT:
    --------
        1. data:    <?>     whatever was saved

     RAISE:
    -------
        1. IOError
    """
    log.info('Loading %s' % file_path)
    start = time.time()
    data = np.load(file_path)
    log.info('Loading took %d seconds' % (time.time() - start))

    return data
Example #7
0
    def _em(self, users, items, data_feat, target):
        """Runs the EM algorithm to learn both \eta and \beta.

         Args
        ------
            1. users:       <(D, ) int>     user ids
            2. items:       <(D, ) int>     item ids
            3. data_feat:   <(D, f) float>  data-driven (non-intercept) features
            4. target:      <(D, ) int>     target rates
        """
        prev_ll = curr_ll = -np.inf
        reached_conv = False

        # Adding the user intercept constant. In my code, the exposure process has different constants than the
        # rate process, so I only pass the data_feat to the methods and deal with the constants separately.
        # The reason I keep the user const in the user_feat is to avoid starting the counts from 1 in the cython code.
        # Other you trust me, or you can go and look at it :)
        user_feat = np.hstack([np.ones([data_feat.shape[0], 1]), data_feat])

        # Randomly initializing \eta and \beta
        self._initialize_eta(user_feat.shape[1])
        self.pos_model._initialize_beta(user_feat.shape[1])

        pie = self.sigmoid_func(users, items, user_feat)
        rate = self.pos_model.get_est_lambda(users, items, user_feat)

        # Starting with an ESTEP after randomly initializing eta and beta.
        w_ijt = self._e_step(users, items, user_feat, target, pie, rate)

        # M STEP
        pie, rate = self._m_step(users, items, user_feat, target, w_ijt, rate)

        for em_i in xrange(self.em_num_iter):
            w_ijt = self._e_step(users, items, user_feat, target, pie, rate)
            pie, rate = self._m_step(users, items, user_feat, target, w_ijt,
                                     rate)

            # ZIP probability
            if em_i > self.min_em_iter and em_i % self.em_ll_iters == 0:
                curr_ll = np.mean(self.data_log_like(target, rate, pie))
                log.info(
                    'ZipRegression._em: Data LL at iteration %d [%.5f --> %.5f]'
                    % (em_i, prev_ll, curr_ll))

                if np.abs(prev_ll - curr_ll) < self.em_tol:
                    log.info('ZipRegression._em: Reached conversion')

                    reached_conv = True
                    break

                prev_ll = curr_ll

        if not reached_conv:
            log.error(
                'ZipRegression._em: Did not reach convergance after %d iterations'
                % self.em_num_iter)

        log.info('ZipRegression._em: Train data log like %.5f' % curr_ll)
Example #8
0
    def test_f1(self, users, items, data_feat, target, return_vals=False):
        """Evaluates F1 accuracy on test data.

         Args
        ------
            1. users:       <(D, ) int>     user ids
            2. items:       <(D, ) int>     item ids
            3. data_feat:   <(D, f) float>  data-driven (non-intercept) features
            4. target:      <(D, ) int>     target rates
            5. return_vals: <bool>          if True returns all the values instead of the mean (default = False)

         Returns
        ---------
            1. <float> average f1 (if return_vals is False)
            2  <(D, ) float> f1 for each point (if return_vals is True)
            3. np.inf if model is not trained.
        """
        if not self.trained:
            return np.inf

        # At optimization time - it is very likely that some users don't have train data (because they're not active
        # yet). This makes sure that I'm not testing on them.
        test_users = np.unique(users)
        trained_user_mask = np.where(
            np.in1d(users, self.trained_users, assume_unique=False))
        log.info('Trained on %d out of %d test users' %
                 (self.trained_users.shape[0], test_users.shape[0]))

        zip_exp = self.predict(users[trained_user_mask],
                               items[trained_user_mask],
                               data_feat[trained_user_mask])
        vals = objectives.f_measure(target[trained_user_mask], zip_exp)

        if return_vals:
            return vals
        else:
            return np.mean(vals)
Example #9
0
    def start_sampling(self, num_points, batch_size):
        """Creates a sampling process for the (num_points, batch_size) pair.

         Args
        ------
            1. num_points:  <int>   number of elements to choose from
            2. batch_size:  <int>   number of choices
        """
        log.info('AsyncSampler.start_sampling: Starting a sampler for [%d %d]' % (num_points, batch_size))
        pair = (num_points, batch_size)
        
        q = Queue(self.q_size)
        proc_pool = []

        # We save pointers to the queue and the process pool so we can free them in the "destructor"
        self.samplers[pair] = q
        self.proc_pools[pair] = proc_pool

        # Creating processes that will do the sampling
        for i in range(self.num_proc):
            proc = Process(target=self._async_sampler, args=(q, num_points, batch_size))
            atexit.register(proc.terminate)
            proc_pool.append(proc)
            proc.start()
Example #10
0
    def learn_eta(self, users, items, user_feat, w_ijt):
        """Performs the e-step of the EM algorithm to estimate the response values w_ijt.

         Args
        ------
            1. users:       <(D, ) int>      user ids
            2. items:       <(D, ) int>      item ids
            3. user_feat:   <(D, f) float>   user features values
            4. w_ijt:       <(D, ) int>      target response values
        """
        self._initialize_eta(user_feat.shape[1])

        # Number of times the likelihood went down. Used to prevent overfitting and parameter explosion.
        num_down = 0

        prev_ll = curr_ll = -np.inf
        reached_conv = False

        for i in range(1, self.gd_max_iter + 1):
            # Sampling a mini-batch
            samp = gd_commons.fast_sample(user_feat.shape[0],
                                          self.gd_batch_size)

            eta_sgd_point = tm.get_point(
                'eta_sgd_iter')  # Taking this time point after the sample.

            d_features, d_0_prior, d_u_prior = self._eta_derivative_vals(
                users[samp], items[samp], user_feat[samp], w_ijt[samp])

            # ADAM initial values
            adam_vals_u = {
                'mean': np.zeros(self.eta_u.shape),
                'var': np.zeros(self.eta_u.shape),
                't': 0
            }
            adam_vals_0 = {'mean': 0, 'var': 0, 't': 0}

            g_grad = gd_commons.grad_for_global(d_features[:, 0], d_0_prior)
            u_grad = gd_commons.grad_for_user(users[samp], d_features[:, 1:],
                                              d_u_prior)

            # These operations are safe because if the user or item were not in the sample the grad for them will be
            # zero.
            self.eta_0 += gd_commons.get_adam_update(self.gd_step_size, g_grad,
                                                     adam_vals_0)
            self.eta_u += gd_commons.get_adam_update(self.gd_step_size, u_grad,
                                                     adam_vals_u)

            eta_sgd_point.collect()

            # Checking for convergence - using only the data likelihood.
            if i >= self.min_gd_iter and i % self.gd_ll_iters == 0:
                curr_ll = self.eta_likelihood(users, items, user_feat, w_ijt)

                if curr_ll < prev_ll:
                    num_down += 1

                log.info(
                    'ZipRegression.learn_eta: Data log like after %d iterations [%.5f --> %.5f]'
                    % (i, prev_ll, curr_ll))

                if np.abs(curr_ll - prev_ll
                          ) <= self.gd_tol or num_down >= self.gd_num_dec:
                    log.info(
                        'ZipRegression.learn_eta: Reached convergance after %d iterations'
                        % i)
                    reached_conv = True
                    break

                prev_ll = curr_ll

        if not reached_conv:
            log.info(
                'ZipRegression.learn_eta: Did not reach convergance after %d iterations'
                % self.gd_max_iter)

        log.info('ZipRegression.learn_eta: Train data log like %.3f' % curr_ll)
Example #11
0
    def _learn_beta(self, users, items, user_feat, target, weights=None):
        """Learns all the \beta's using stochastic gradient descent with ADAM.

         Args
        ------
            1. users:       <(D, ) int>     user ids
            2. items:       <(D, ) int>     item ids
            3. user_feat:   <(D, f) float>  user features values
            4. target:      <(D, ) int>     target rates
            5. weights:     <(D, ) float>   points weights for the weighted regression case

         Raises
        --------
            1. ValueError if coefficients went out of hand and got the value of np.inf.
        """
        self._initialize_beta(user_feat.shape[1])

        # ADAM initial values
        adam_vals_u = {
            'mean': np.zeros(self.beta_u.shape),
            'var': np.zeros(self.beta_u.shape),
            't': 0
        }
        adam_vals_i = {
            'mean': np.zeros(self.beta_i.shape),
            'var': np.zeros(self.beta_i.shape),
            't': 0
        }
        adam_vals_0 = {'mean': 0, 'var': 0, 't': 0}

        # Number of times the likelihood went down. Used to prevent overfitting and parameter explosion.
        num_down = 0

        prev_ll = curr_ll = -np.inf
        reached_conv = False

        # Gradient descent main loop
        for i in range(1, self.gd_num_iter + 1):
            # Sampling a mini-bucket
            samp = gd_commons.fast_sample(user_feat.shape[0],
                                          self.gd_batch_size)

            point = tm.get_point('pois_reg_sgd_iter'
                                 )  # Taking this time point after the sample.

            # First computing all the derivative values. Not computing the gradients yet.
            d_pois_reg, d_0_prior, d_i_prior, d_u_prior = \
                self._beta_derivative_vals(users[samp], items[samp], user_feat[samp], target[samp])

            if weights is not None:
                # It's weighted regression and I need to modify the weight of each point.
                d_pois_reg *= np.atleast_2d(weights[samp]).T

            # Computing all the gradients
            g_grad = gd_commons.grad_for_global(d_pois_reg[:, 0], d_0_prior)
            i_grad = gd_commons.grad_for_item(items[samp], d_pois_reg[:, 1],
                                              d_i_prior)
            u_grad = gd_commons.grad_for_user(users[samp], d_pois_reg[:, 2:],
                                              d_u_prior)

            # These operations are safe because if the user or item were not in the sample the grad for them will be
            # zero.

            self.beta_0 += gd_commons.get_adam_update(self.gd_step_size,
                                                      g_grad, adam_vals_0)
            self.beta_i += gd_commons.get_adam_update(self.gd_step_size,
                                                      i_grad, adam_vals_i)
            self.beta_u += gd_commons.get_adam_update(self.gd_step_size,
                                                      u_grad, adam_vals_u)

            point.collect()

            # Checking for convergence - using only the data likelihood.
            if i > self.min_gd_iter and i % self.gd_ll_iters == 0:
                curr_ll = self._pois_reg_data_log_like(target, users, items,
                                                       user_feat, weights)

                if curr_ll < prev_ll:
                    num_down += 1

                if np.isnan(curr_ll) or np.isinf(curr_ll):
                    raise ValueError(
                        'Pois_Reg: Coefficient values went out of hand -- adjust regularizer value.'
                    )

                log.info('Pois_Reg data log like: [%.3f --> %.3f]' %
                         (prev_ll, curr_ll))

                if np.abs(curr_ll - prev_ll
                          ) <= self.gd_tol or num_down >= self.gd_num_dec:
                    log.info(
                        'Pois_Reg: Reached convergance after %d iterations' %
                        i)

                    reached_conv = True
                    break

                prev_ll = curr_ll

        if not reached_conv:
            log.error(
                'Pois_Reg: Did not reach convergence after %d iterations' %
                self.gd_num_iter)

        log.info('Pois_Reg: Train log like %.3f' % curr_ll)
Example #12
0
    def _learn_beta(self, users, items, features, target, weights=None):
        # If any of the parameters wasn't initialized
        if self.beta_u is None:
            self.beta_u = np.random.normal(0, 0.1, [self.N, features.shape[1]])
        if self.beta_i is None:
            self.beta_i = np.random.normal(0, 0.1, self.M)
        if self.beta_0 is None:
            self.beta_0 = np.random.normal(0, 0.1, 1)[0]

        if self.gd_adam:
            adam_vals_u = {
                'mean': np.zeros(self.beta_u.shape),
                'var': np.zeros(self.beta_u.shape),
                't': 0
            }
            adam_vals_i = {
                'mean': np.zeros(self.beta_i.shape),
                'var': np.zeros(self.beta_i.shape),
                't': 0
            }
            adam_vals_0 = {'mean': 0, 'var': 0, 't': 0}

        # Computing the lambda array

        reached_conv = False
        for i in range(1, self.gd_num_iter + 1):
            beta_iter_point = tm.get_point('beta_sgd_iter')
            point = tm.get_point('beta_sgd_samp')
            if self.gd_weights_sample:
                samp = gd_commons.fast_sample_with_weights(weights)
            else:
                samp = gd_commons.fast_sample(features.shape[0],
                                              self.gd_batch_size)

            point.collect()

            point = tm.get_point('beta_derivative_vals')
            d_mle, d_g_prior, d_i_prior, d_u_prior = \
                self._beta_derivative_vals(users[samp], items[samp], features[samp], target[samp])

            point.collect()

            # TODO: Discuss the most proper way to combine the weights and the prior/regularization with Padhraic
            if weights is not None and not self.gd_weights_sample:
                # If it's weight sample no need to modify the mle with the weights
                d_mle *= np.atleast_2d(weights[samp]).T

            # Updating the gradient
            g_grad = gd_commons.grad_for_global(d_mle[:, 0], d_g_prior)
            i_grad = gd_commons.grad_for_item(items[samp], d_mle[:, 1],
                                              d_i_prior)
            u_grad = gd_commons.grad_for_user(users[samp], d_mle[:, 2:],
                                              d_u_prior)

            a = self.gd_step_size / self.decay if self.gd_decay else self.gd_step_size

            # These operations are safe because if the user or item were not in the sample the grad for them will be
            # zero.
            point = tm.get_point('beta_grad_updates')
            if self.gd_adam:
                self.beta_0 += gd_commons.get_AdaM_update(
                    a, g_grad, adam_vals_0)
                self.beta_i += gd_commons.get_AdaM_update(
                    a, i_grad, adam_vals_i)
                self.beta_u += gd_commons.get_AdaM_update(
                    a, u_grad, adam_vals_u)
            else:
                self.beta_0 += g_grad * a
                self.beta_i += i_grad * a
                self.beta_u += u_grad * a

            point.collect()

            beta_iter_point.collect()
            if i % self.gd_ll_iters == 0:
                point = tm.get_point('beta_mle')
                curr_ll = self._mle(target, users, items, features, weights)
                point.collect()
                if np.isnan(curr_ll) or np.isinf(curr_ll):
                    raise ValueError(
                        'Coefficient values went out of hand -- adjust lambda and/or step size'
                    )
                log.info('BETA GD MLE: [%.3f --> %.3f]' %
                         (self.prev_ll, curr_ll))
                if np.abs(curr_ll - self.prev_ll) <= self.gd_tol:
                    log.info(
                        'BETA GD: Reached convergance after %d iterations' % i)
                    reached_conv = True
                    self.prev_ll = curr_ll
                    break
                else:
                    self.prev_ll = curr_ll

                self.decay += 1

        if not reached_conv:
            log.error(
                'BETA GD: Did not reach convergance after %d iterations' %
                self.gd_num_iter)

        log.info('BETA GD: Train log like %.3f' % curr_ll)
Example #13
0
def log_summary():
    tm_df = get_summary()
    log.info('\n\n*****  TIME MEASUREMENTS  *****\n\n%s\n\n' % tm_df)
    reset_tm()