Esempio n. 1
0
    def predict_items(self, users=None, top=None):
        """Perform predictions on samples in 'users' for all items.

        Args:
            users: array, optional
                Array with the indices of the users to which make the
                predictions. If None (default), predicts for all users.

            top: int, optional
                Returns the k-first predictions. (Do not confuse with
                'top-best').

        Returns:
            C: ndarray, shape = (n_samples, n_items)
                Returns predicted values.

        """

        if users is None:
            users = np.asarray(range(0, len(self.bias['dUsers'])))

        predictions = []
        trusters_cache = defaultdict(list)
        feedback_cached = defaultdict(list)
        isFeedbackADict = isinstance(self.feedback, dict)

        for i in range(0, len(users)):
            u = users[i]

            trustees_u = cache_rows(self.trust, u, trusters_cache)

            if isFeedbackADict:
                feedback_u = self.feedback[u]
            else:
                feedback_u = cache_rows(self.feedback, u, feedback_cached)

            pred = _predict_all_items(u, self.bias['globalAvg'],
                                      self.bias['dUsers'], self.bias['dItems'],
                                      self.P, self.Q, self.Y, self.W,
                                      feedback_u, trustees_u)
            predictions.append(pred)

        predictions = np.asarray(predictions)

        # Return top-k recommendations
        if top is not None:
            predictions = predictions[:, :top]

        return super().predict_on_range(predictions)
    def predict_items(self, users=None, top=None):
        """Perform predictions on samples in 'users' for all items.

        Args:
            users: array, optional
                Array with the indices of the users to which make the
                predictions. If None (default), predicts for all users.

            top: int, optional
                Returns the k-first predictions. (Do not confuse with
                'top-best').

        Returns:
            C: ndarray, shape = (n_samples, n_items)
                Returns predicted values.

        """

        if users is None:
            users = np.asarray(range(0, len(self.bias['dUsers'])))

        predictions = []
        trusters_cache = defaultdict(list)
        feedback_cached = defaultdict(list)
        isFeedbackADict = isinstance(self.feedback, dict)

        for i in range(0, len(users)):
            u = users[i]

            trustees_u = cache_rows(self.trust, u, trusters_cache)

            if isFeedbackADict:
                feedback_u = self.feedback[u]
            else:
                feedback_u = cache_rows(self.feedback, u, feedback_cached)

            pred = _predict_all_items(u, self.bias['globalAvg'],
                                      self.bias['dUsers'], self.bias['dItems'],
                                      self.P, self.Q, self.Y, self.W,
                                      feedback_u, trustees_u)
            predictions.append(pred)

        predictions = np.asarray(predictions)

        # Return top-k recommendations
        if top is not None:
            predictions = predictions[:, :top]

        return super().predict_on_range(predictions)
Esempio n. 3
0
    def predict(self, X):
        """Perform predictions on samples in X.

        This function receives an array of indices and returns the prediction
        for each one.

        Args:
            X: ndarray
                Samples. Matrix that contains user-item pairs.

        Returns:
            C: array, shape = (n_samples,)
                Returns predicted values.

        """

        # Prepare data (set valid indices for non-existing (CV))
        X = super().prepare_predict(X)

        users = X[:, self.order[0]]
        items = X[:, self.order[1]]

        predictions = np.zeros(len(X))

        trusters_cache = defaultdict(list)
        feedback_cached = defaultdict(list)
        isFeedbackADict = isinstance(self.feedback, dict)

        for i in range(0, len(users)):
            u = users[i]

            trustees_u = cache_rows(self.trust, u, trusters_cache)
            # No need to cast for CV because of "max(num_users, shape_t[0])"

            if isFeedbackADict:
                feedback_u = self.feedback[u]
            else:
                feedback_u = cache_rows(self.feedback, u, feedback_cached)
                feedback_u = feedback_u[feedback_u < self.shape[1]]  # For CV

            predictions[i] = _predict(u, items[i], self.bias['globalAvg'],
                                      self.bias['dUsers'], self.bias['dItems'],
                                      self.P, self.Q, self.Y, self.W,
                                      feedback_u, trustees_u)[0]

        # Set predictions for non-existing indices (CV)
        predictions = self.fix_predictions(X, predictions, self.bias)
        return super().predict_on_range(np.asarray(predictions))
Esempio n. 4
0
def compute_loss(data, low_rank_matrices, params):

    # Set parameters
    ratings = data
    U, V = low_rank_matrices
    lmbda = params

    # Check data type
    if isinstance(ratings, __sparse_format__):
        pass
    elif isinstance(ratings, Table):
        # Preprocess Orange.data.Table and transform it to sparse
        ratings, order, shape = preprocess(ratings)
        ratings = table2sparse(ratings, shape, order, m_type=__sparse_format__)
    else:
        raise TypeError('Invalid data type')

    # Cache rows
    users_cached = defaultdict(list)

    F = -0.5*lmbda*(np.sum(U*U)+np.sum(V*V))

    for i in range(len(U)):
        # Precompute f (f[j] = <U[i], V[j]>)
        items = cache_rows(ratings, i, users_cached)
        f = np.einsum('j,ij->i', U[i], V[items])

        for j in range(len(items)):  # j=items
            F += np.log(_g(f[j]))
            F += np.log(1 - _g(f - f[j])).sum(axis=0)  # For I
    return F
    def predict(self, X):
        """Perform predictions on samples in X.

        This function receives an array of indices and returns the prediction
        for each one.

        Args:
            X: ndarray
                Samples. Matrix that contains user-item pairs.

        Returns:
            C: array, shape = (n_samples,)
                Returns predicted values.

        """

        # Prepare data (set valid indices for non-existing (CV))
        X = super().prepare_predict(X)

        users = X[:, self.order[0]]
        items = X[:, self.order[1]]

        predictions = np.zeros(len(X))

        trusters_cache = defaultdict(list)
        feedback_cached = defaultdict(list)
        isFeedbackADict = isinstance(self.feedback, dict)

        for i in range(0, len(users)):
            u = users[i]

            trustees_u = cache_rows(self.trust, u, trusters_cache)
            # No need to cast for CV because of "max(num_users, shape_t[0])"

            if isFeedbackADict:
                feedback_u = self.feedback[u]
            else:
                feedback_u = cache_rows(self.feedback, u, feedback_cached)
                feedback_u = feedback_u[feedback_u < self.shape[1]]  # For CV

            predictions[i] = _predict(u, items[i], self.bias['globalAvg'],
                              self.bias['dUsers'], self.bias['dItems'], self.P,
                              self.Q, self.Y, self.W, feedback_u, trustees_u)[0]

        # Set predictions for non-existing indices (CV)
        predictions = self.fix_predictions(X, predictions, self.bias)
        return super().predict_on_range(np.asarray(predictions))
Esempio n. 6
0
def compute_loss(data, bias, low_rank_matrices, params):

    # Set parameters
    ratings, feedback = data
    global_avg, bu, bi = bias
    P, Q, Y = low_rank_matrices
    lmbda, bias_lmbda = params

    # Check data type
    if isinstance(ratings, __sparse_format__):
        pass
    elif isinstance(ratings, Table):
        # Preprocess Orange.data.Table and transform it to sparse
        ratings, order, shape = preprocess(ratings)
        ratings = table2sparse(ratings, shape, order, m_type=__sparse_format__)
    else:
        raise TypeError('Invalid data type')

    # Check data type
    if isinstance(feedback, dict) or isinstance(feedback, __sparse_format__):
        pass
    elif isinstance(feedback, Table):
        # Preprocess Orange.data.Table and transform it to sparse
        feedback, order, shape = preprocess(feedback)
        feedback = table2sparse(feedback,
                                shape,
                                order,
                                m_type=__sparse_format__)
    else:
        raise TypeError('Invalid data type')

    # Set caches
    feedback_cached = defaultdict(list)
    isFeedbackADict = isinstance(feedback, dict)

    # Compute loss
    objective = 0
    for u, j in zip(*ratings.nonzero()):

        # Get feedback from the cache
        if isFeedbackADict:
            feedback_u = feedback[u]
        else:
            feedback_u = cache_rows(feedback, u, feedback_cached)

        # Prediction

        ruj_pred = _predict(u, j, global_avg, bu, bi, P, Q, Y, feedback_u)[0]
        objective += (ratings[u, j] - ruj_pred)**2  # error^2

        # Regularization
        temp_y = np.sum(Y[feedback_u, :], axis=0)
        objective += lmbda * (np.linalg.norm(P[u, :]) ** 2 +
                              np.linalg.norm(Q[j, :]) ** 2 +
                              np.linalg.norm(temp_y) ** 2) + \
                     bias_lmbda * (bu[u] ** 2 + bi[j] ** 2)

    return objective
def compute_loss(data, bias, low_rank_matrices, params):

    # Set parameters
    ratings, feedback = data
    global_avg, bu, bi = bias
    P, Q, Y = low_rank_matrices
    lmbda, bias_lmbda = params

    # Check data type
    if isinstance(ratings, __sparse_format__):
        pass
    elif isinstance(ratings, Table):
        # Preprocess Orange.data.Table and transform it to sparse
        ratings, order, shape = preprocess(ratings)
        ratings = table2sparse(ratings, shape, order, m_type=__sparse_format__)
    else:
        raise TypeError('Invalid data type')

    # Check data type
    if isinstance(feedback, dict) or isinstance(feedback, __sparse_format__):
        pass
    elif isinstance(feedback, Table):
        # Preprocess Orange.data.Table and transform it to sparse
        feedback, order, shape = preprocess(feedback)
        feedback = table2sparse(feedback, shape, order, m_type=__sparse_format__)
    else:
        raise TypeError('Invalid data type')

    # Set caches
    feedback_cached = defaultdict(list)
    isFeedbackADict = isinstance(feedback, dict)

    # Compute loss
    objective = 0
    for u, j in zip(*ratings.nonzero()):

        # Get feedback from the cache
        if isFeedbackADict:
            feedback_u = feedback[u]
        else:
            feedback_u = cache_rows(feedback, u, feedback_cached)

        # Prediction

        ruj_pred = _predict(u, j, global_avg, bu, bi, P, Q, Y, feedback_u)[0]
        objective += (ratings[u, j] - ruj_pred) ** 2  # error^2

        # Regularization
        temp_y = np.sum(Y[feedback_u, :], axis=0)
        objective += lmbda * (np.linalg.norm(P[u, :]) ** 2 +
                              np.linalg.norm(Q[j, :]) ** 2 +
                              np.linalg.norm(temp_y) ** 2) + \
                     bias_lmbda * (bu[u] ** 2 + bi[j] ** 2)

    return objective
def _matrix_factorization(ratings, trust, bias, shape, shape_t, num_factors, 
                          num_iter, learning_rate, bias_learning_rate, lmbda, 
                          bias_lmbda, social_lmbda, optimizer, verbose=False,
                          random_state=None, callback=None):

    # Seed the generator
    if random_state is not None:
        np.random.seed(random_state)

    # Get featured matrices dimensions
    num_users, num_items = shape
    num_users = max(num_users, max(shape_t))

    # Initialize low-rank matrices
    P = np.random.rand(num_users, num_factors)  # User-feature matrix
    Q = np.random.rand(num_items, num_factors)  # Item-feature matrix
    Y = np.random.randn(num_items, num_factors)  # Feedback-feature matrix
    W = np.random.randn(num_users, num_factors)  # Trust-feature matrix

    # Compute bias (not need it if learnt)
    global_avg = bias['globalAvg']
    bu = bias['dUsers']
    bi = bias['dItems']

    # Configure optimizer
    update_bu = create_opt(optimizer, bias_learning_rate).update
    update_bj = create_opt(optimizer, bias_learning_rate).update
    update_pu = create_opt(optimizer, learning_rate).update
    update_qj = create_opt(optimizer, learning_rate).update
    update_yi = create_opt(optimizer, learning_rate).update
    update_wv = create_opt(optimizer, learning_rate).update

    # Cache rows
    # >>> From 2 days to 30s
    users_cache = defaultdict(list)
    trusters_cache = defaultdict(list)

    # Cache norms (slower than list, but allows vectorization)
    # >>>  Lists: 6s; Arrays: 12s -> vectorized: 2s
    norm_I = np.zeros(num_users)  # norms of Iu
    norm_U = np.zeros(num_items)  # norms of Ui
    norm_Tr = np.zeros(num_users)  # norms of Tu
    norm_Tc = np.zeros(num_users)  # norms of Tv

    # Precompute transpose (most costly operation)
    ratings_T = ratings.T
    trust_T = trust.T

    # Print information about the verbosity level
    if verbose:
        print('TrustSVD factorization started.')
        print('\tLevel of verbosity: ' + str(int(verbose)))
        print('\t\t- Verbosity = 1\t->\t[time/iter]')
        print('\t\t- Verbosity = 2\t->\t[time/iter, loss]')
        print('')

    # Catch warnings
    with warnings.catch_warnings():

        # Turn matching warnings into exceptions
        warnings.filterwarnings('error')
        try:

            # Factorize matrix using SGD
            for step in range(num_iter):
                if verbose:
                    start = time.time()
                    print('- Step: %d' % (step + 1))

                # Send information about the process
                if callback:
                    callback(step + 1)

                # Optimize rating prediction
                for u, j in zip(*ratings.nonzero()):

                    # Store lists in cache
                    items_u = cache_rows(ratings, u, users_cache)
                    trustees_u = cache_rows(trust, u, trusters_cache)
                    # No need to cast for CV due to max(num_users, shape_t[0])

                    # Prediction and error
                    ruj_pred, y_term, w_term, norm_Iu, norm_Tu = \
                        _predict(u, j, global_avg, bu, bi, P, Q, Y, W, items_u,
                                   trustees_u)
                    euj = ruj_pred - ratings[u, j]

                    # Store/Compute norms
                    norm_I[u] = norm_Iu
                    norm_Tr[u] = norm_Tu
                    norm_Uj = cache_norms(ratings_T, j, norm_U)

                    # Gradient Bu
                    reg_bu = (bias_lmbda/norm_Iu) * bu[u] if norm_Iu > 0 else 0
                    dx_bu = euj + reg_bu

                    # Gradient Bi
                    reg_bi = (bias_lmbda/norm_Uj) * bi[j] if norm_Uj > 0 else 0
                    dx_bi = euj + reg_bi

                    # Update the gradients Bu, Bi at the same time
                    update_bu(dx_bu, bu, u)
                    update_bj(dx_bi, bi, j)

                    # Gradient P
                    reg_p = (lmbda/norm_Iu) * P[u, :] if norm_Iu > 0 else 0
                    dx_pu = euj * Q[j, :] + reg_p
                    update_pu(dx_pu, P, u)

                    # Gradient Q
                    reg_q = (lmbda/norm_Uj) * Q[j, :] if norm_Uj > 0 else 0
                    dx_qi = euj * (P[u, :] + y_term + w_term) + reg_q
                    update_qj(dx_qi, Q, j)

                    # Gradient Y
                    if norm_Iu > 0:
                        tempY1 = (euj/norm_Iu) * Q[j, :]
                        norms = cache_norms(ratings_T, items_u, norm_U)
                        norm_b = (lmbda/np.atleast_2d(norms))
                        dx_yi = tempY1 + np.multiply(norm_b.T, Y[items_u, :])
                        update_yi(dx_yi, Y, items_u)

                    # Gradient W
                    if norm_Tu > 0:
                        tempW1 = (euj/norm_Tu) * Q[j, :]  # W: Part 1
                        norms = cache_norms(trust_T, trustees_u, norm_Tc)
                        norm_b = (lmbda/np.atleast_2d(norms))
                        dx_wv = tempW1 + np.multiply(norm_b.T, W[trustees_u, :])
                        update_wv(dx_wv, W, trustees_u)

                # Optimize trust prediction
                for u, v in zip(*trust.nonzero()):

                    # Prediction and error
                    tuv_pred = np.dot(W[v, :], P[u, :])
                    euv = tuv_pred - trust[u, v]

                    # Gradient P (Part 2)
                    norm_Tu = cache_norms(trust, u, norm_Tr)
                    reg_p = P[u, :]/norm_Tu if norm_Tu > 0 else 0
                    dx_pu = social_lmbda * (euv * W[v, :] + reg_p)
                    update_pu(dx_pu, P, u)

                    # Gradient W (Part 2)
                    dx_wv = social_lmbda * euv * P[u, :]
                    update_wv(dx_wv, W, v)

                # Print process
                if verbose:
                    print('\t- Time: %.3fs' % (time.time() - start))

                    if verbose > 1:
                        # Set parameters and compute loss
                        data_t = (ratings, trust)
                        bias_t = (global_avg, bu, bi)
                        low_rank_matrices = (P, Q, Y, W)
                        params = (lmbda, bias_lmbda, social_lmbda)
                        objective = compute_loss(
                            data_t, bias_t, low_rank_matrices, params)

                        print('\t- Training loss: %.3f' % objective)
                    print('')

                # Send information about the process
                if callback:
                    callback(step+1)

        except RuntimeWarning:
            callback(num_iter) if callback else None
            raise RuntimeError('Training diverged and returned NaN.')

    return P, Q, Y, W, bu, bi, users_cache
def compute_loss(data, bias, low_rank_matrices, params):

    # Set parameters
    ratings, trust = data
    global_avg, bu, bi = bias
    P, Q, Y, W = low_rank_matrices
    lmbda, bias_lmbda, social_lmbda = params

    # Check data type
    if isinstance(ratings, __sparse_format__):
        pass
    elif isinstance(ratings, Table):
        # Preprocess Orange.data.Table and transform it to sparse
        ratings, order, shape = preprocess(ratings)
        ratings = table2sparse(ratings, shape, order, m_type=__sparse_format__)
    else:
        raise TypeError('Invalid data type')

    # Check data type
    if isinstance(trust, dict) or isinstance(trust, __sparse_format__):
        pass
    elif isinstance(trust, Table):
        # Preprocess Orange.data.Table and transform it to sparse
        trust, order, shape = preprocess(trust)
        trust = table2sparse(trust, shape, order, m_type=__sparse_format__)
    else:
        raise TypeError('Invalid data type')

    # Get featured matrices dimensions
    num_users, num_items = ratings.shape
    num_users = max(num_users, max(trust.shape))

    # Cache rows
    # >>> From 2 days to 30s
    users_cache = defaultdict(list)
    trusters_cache = defaultdict(list)

    # Cache norms (slower than list, but allows vectorization)
    # >>>  Lists: 6s; Arrays: 12s -> vectorized: 2s
    norm_I = np.zeros(num_users)  # norms of Iu
    norm_U = np.zeros(num_items)  # norms of Uj
    norm_Tr = np.zeros(num_users)  # norms of Tu
    norm_Tc = np.zeros(num_users)  # norms of Tv

    # Precompute transpose (most costly operation)
    ratings_T = ratings.T
    trust_T = trust.T

    # Optimize rating prediction
    objective = 0
    for u, j in zip(*ratings.nonzero()):

        # Store lists in cache
        items_u = cache_rows(ratings, u, users_cache)
        trustees_u = cache_rows(trust, u, trusters_cache)

        # Prediction and error
        ruj_pred, _, _, norm_Iu, norm_Tu = \
            _predict(u, j, global_avg, bu, bi, P, Q, Y, W, items_u, trustees_u)

        # Cache norms
        norm_I[u] = norm_Iu
        norm_Tr[u] = norm_Tu

        # Compute loss
        objective += 0.5 * (ruj_pred - ratings[u, j])**2

    # Optimize trust prediction
    for u, v in zip(*trust.nonzero()):
        # Prediction
        tuv_pred = np.dot(W[v, :], P[u, :])

        # Compute loss
        objective += social_lmbda * 0.5 * (tuv_pred - trust[u, v])**2

    for u in range(P.shape[0]):   # users
        # Cache norms
        norm_Iu = cache_norms(ratings, u, norm_I)
        norm_Tu = cache_norms(trust, u, norm_Tr)
        norm_Tv = cache_norms(trust_T, u, norm_Tc)

        # Compute loss
        term_l = 0
        if norm_Iu > 0:
            objective += bias_lmbda/(2*norm_Iu) * bu[u]**2
            term_l = lmbda/(2*norm_Iu)

        term_s = 0
        if norm_Tu > 0:
            term_s = social_lmbda/(2 * norm_Tu)

        term_ls = term_l + term_s
        if term_ls > 0:
            objective += term_ls * np.linalg.norm(P[u, :])**2

        if norm_Tv > 0:
            objective += lmbda/(2*norm_Tv) * np.linalg.norm(W[u, :])**2

    for j in range(Q.shape[0]):   # items
        # Cache norms
        norm_Uj = cache_norms(ratings_T, j, norm_U)

        # Compute loss
        if norm_Uj > 0:
            objective += bias_lmbda/(2*norm_Uj) * bi[j]**2
            objective += lmbda/(2*norm_Uj) * np.linalg.norm(Q[j, :])**2
            objective += lmbda/(2*norm_Uj) * np.linalg.norm(Y[j, :])**2

    return objective
Esempio n. 10
0
def _matrix_factorization(ratings,
                          feedback,
                          bias,
                          shape,
                          num_factors,
                          num_iter,
                          learning_rate,
                          bias_learning_rate,
                          lmbda,
                          bias_lmbda,
                          optimizer,
                          verbose=False,
                          random_state=None,
                          callback=None):

    # Seed the generator
    if random_state is not None:
        np.random.seed(random_state)

    # Get featured matrices dimensions
    num_users, num_items = shape

    # Initialize low-rank matrices
    P = np.random.rand(num_users, num_factors)  # User-feature matrix
    Q = np.random.rand(num_items, num_factors)  # Item-feature matrix
    Y = np.random.randn(num_items, num_factors)  # Feedback-feature matrix

    # Compute bias (not need it if learnt)
    global_avg = bias['globalAvg']
    bu = bias['dUsers']
    bi = bias['dItems']

    # Configure optimizer
    update_bu = create_opt(optimizer, bias_learning_rate).update
    update_bj = create_opt(optimizer, bias_learning_rate).update
    update_pu = create_opt(optimizer, learning_rate).update
    update_qj = create_opt(optimizer, learning_rate).update
    update_yi = create_opt(optimizer, learning_rate).update

    # Cache rows
    users_cached = defaultdict(list)
    feedback_cached = defaultdict(list)

    # Print information about the verbosity level
    if verbose:
        print('SVD++ factorization started.')
        print('\tLevel of verbosity: ' + str(int(verbose)))
        print('\t\t- Verbosity = 1\t->\t[time/iter]')
        print('\t\t- Verbosity = 2\t->\t[time/iter, loss]')
        print('')

    # Catch warnings
    with warnings.catch_warnings():

        # Turn matching warnings into exceptions
        warnings.filterwarnings('error')
        try:

            # Factorize matrix using SGD
            for step in range(num_iter):
                if verbose:
                    start = time.time()
                    print('- Step: %d' % (step + 1))

                # Send information about the process
                if callback:
                    callback(step + 1)

                # Optimize rating prediction
                for u, j in zip(*ratings.nonzero()):

                    # if there is no feedback, infer it from the ratings
                    if feedback is None:
                        feedback_u = cache_rows(ratings, u, users_cached)
                    else:
                        feedback_u = cache_rows(feedback, u, feedback_cached)
                        feedback_u = feedback_u[feedback_u <
                                                num_items]  # For CV

                    # Prediction and error
                    ruj_pred, y_term, norm_feedback = \
                        _predict(u, j, global_avg, bu, bi, P, Q, Y, feedback_u)
                    eij = ratings[u, j] - ruj_pred

                    # Compute gradients
                    dx_bu = -eij + bias_lmbda * bu[u]
                    dx_bi = -eij + bias_lmbda * bi[j]
                    dx_pu = -eij * Q[j, :] + lmbda * P[u, :]
                    dx_qi = -eij * (P[u, :] + y_term) + lmbda * Q[j, :]

                    # Update the gradients at the same time
                    update_bu(dx_bu, bu, u)
                    update_bj(dx_bi, bi, j)
                    update_pu(dx_pu, P, u)
                    update_qj(dx_qi, Q, j)

                    if norm_feedback > 0:  # Gradient Y
                        dx_yi = -eij/norm_feedback * Q[j, :] \
                                + lmbda * Y[feedback_u, :]
                        update_yi(dx_yi, Y, feedback_u)

                # Print process
                if verbose:
                    print('\t- Time: %.3fs' % (time.time() - start))

                    if verbose > 1:
                        # Set parameters and compute loss
                        loss_feedback = feedback if feedback else users_cached
                        data_t = (ratings, loss_feedback)
                        bias_t = (global_avg, bu, bi)
                        low_rank_matrices = (P, Q, Y)
                        params = (lmbda, bias_lmbda)
                        objective = compute_loss(data_t, bias_t,
                                                 low_rank_matrices, params)

                        print('\t- Training loss: %.3f' % objective)
                    print('')

            if feedback is None:
                feedback = users_cached

        except RuntimeWarning as e:
            callback(num_iter) if callback else None
            raise RuntimeError('Training diverged and returned NaN.')

    return P, Q, Y, bu, bi, feedback
Esempio n. 11
0
def _matrix_factorization(ratings, shape, num_factors, num_iter, learning_rate,
                          lmbda, optimizer, verbose=False, random_state=None,
                          callback=None):
    # Seed the generator
    if random_state is not None:
        np.random.seed(random_state)

    # Get featured matrices dimensions
    num_users, num_items = shape

    # Initialize low-rank matrices
    U = 0.01 * np.random.rand(num_users, num_factors)  # User-feature matrix
    V = 0.01 * np.random.rand(num_items, num_factors)  # Item-feature matrix

    # Configure optimizer
    update_ui = create_opt(optimizer, learning_rate).update
    update_vw = create_opt(optimizer, learning_rate).update

    # Cache rows
    users_cached = defaultdict(list)

    # Print information about the verbosity level
    if verbose:
        print('CLiMF factorization started.')
        print('\tLevel of verbosity: ' + str(int(verbose)))
        print('\t\t- Verbosity = 1\t->\t[time/iter]')
        print('\t\t- Verbosity = 2\t->\t[time/iter, loss]')
        print('\t\t- Verbosity = 3\t->\t[time/iter, loss, MRR]')
        print('')

        # Prepare sample of users
        if verbose > 2:
            queries = None
            num_samples = min(num_users, 1000)  # max. number to sample
            users_sampled = np.random.choice(np.arange(num_users), num_samples)

    # Catch warnings
    with warnings.catch_warnings():

        # Turn matching warnings into exceptions
        warnings.filterwarnings('error')
        try:

            # Factorize matrix using SGD
            for step in range(num_iter):
                if verbose:
                    start = time.time()
                    print('- Step: %d' % (step + 1))

                # Send information about the process
                if callback:
                    callback(step + 1)

                # Optimize rating prediction
                for i in range(len(U)):
                    dU = -lmbda * U[i]

                    # Precompute f (f[j] = <U[i], V[j]>)
                    items = cache_rows(ratings, i, users_cached)
                    f = np.einsum('j,ij->i', U[i], V[items])

                    for j in range(len(items)):  # j=items
                        w = items[j]

                        dV = _g(-f[j]) - lmbda * V[w]

                        # For I
                        vec1 = _dg(f[j] - f) * \
                               (1 / (1 - _g(f - f[j])) - 1 / (1 - _g(f[j] - f)))
                        dV += np.einsum('i,j->ij', vec1, U[i]).sum(axis=0)

                        update_vw(-dV, V, w)

                        dU += _g(-f[j]) * V[w]

                        # For II
                        vec2 = (V[items[j]] - V[items])
                        vec3 = _dg(f - f[j]) / (1 - _g(f - f[j]))
                        dU += np.einsum('ij,i->ij', vec2, vec3).sum(axis=0)

                    update_ui(-dU, U, i)

                # Print process
                if verbose:
                    print('\t- Time: %.3fs' % (time.time() - start))

                    if verbose > 1:
                        # Set parameters and compute loss
                        low_rank_matrices = (U, V)
                        params = lmbda
                        objective = compute_loss(ratings, low_rank_matrices, params)
                        print('\t- Training loss: %.3f' % objective)

                        if verbose > 2:
                            model = CLiMFModel(U=U, V=V)
                            mrr, queries = \
                                model.compute_mrr(ratings, users_sampled, queries)
                            print('\t- Train MRR: %.4f' % mrr)
                    print('')

        except RuntimeWarning:
            callback(num_iter) if callback else None
            raise RuntimeError('Training diverged and returned NaN.')

    return U, V
Esempio n. 12
0
def _matrix_factorization(ratings,
                          trust,
                          bias,
                          shape,
                          shape_t,
                          num_factors,
                          num_iter,
                          learning_rate,
                          bias_learning_rate,
                          lmbda,
                          bias_lmbda,
                          social_lmbda,
                          optimizer,
                          verbose=False,
                          random_state=None,
                          callback=None):

    # Seed the generator
    if random_state is not None:
        np.random.seed(random_state)

    # Get featured matrices dimensions
    num_users, num_items = shape
    num_users = max(num_users, max(shape_t))

    # Initialize low-rank matrices
    P = np.random.rand(num_users, num_factors)  # User-feature matrix
    Q = np.random.rand(num_items, num_factors)  # Item-feature matrix
    Y = np.random.randn(num_items, num_factors)  # Feedback-feature matrix
    W = np.random.randn(num_users, num_factors)  # Trust-feature matrix

    # Compute bias (not need it if learnt)
    global_avg = bias['globalAvg']
    bu = bias['dUsers']
    bi = bias['dItems']

    # Configure optimizer
    update_bu = create_opt(optimizer, bias_learning_rate).update
    update_bj = create_opt(optimizer, bias_learning_rate).update
    update_pu = create_opt(optimizer, learning_rate).update
    update_qj = create_opt(optimizer, learning_rate).update
    update_yi = create_opt(optimizer, learning_rate).update
    update_wv = create_opt(optimizer, learning_rate).update

    # Cache rows
    # >>> From 2 days to 30s
    users_cache = defaultdict(list)
    trusters_cache = defaultdict(list)

    # Cache norms (slower than list, but allows vectorization)
    # >>>  Lists: 6s; Arrays: 12s -> vectorized: 2s
    norm_I = np.zeros(num_users)  # norms of Iu
    norm_U = np.zeros(num_items)  # norms of Ui
    norm_Tr = np.zeros(num_users)  # norms of Tu
    norm_Tc = np.zeros(num_users)  # norms of Tv

    # Precompute transpose (most costly operation)
    ratings_T = ratings.T
    trust_T = trust.T

    # Print information about the verbosity level
    if verbose:
        print('TrustSVD factorization started.')
        print('\tLevel of verbosity: ' + str(int(verbose)))
        print('\t\t- Verbosity = 1\t->\t[time/iter]')
        print('\t\t- Verbosity = 2\t->\t[time/iter, loss]')
        print('')

    # Catch warnings
    with warnings.catch_warnings():

        # Turn matching warnings into exceptions
        warnings.filterwarnings('error')
        try:

            # Factorize matrix using SGD
            for step in range(num_iter):
                if verbose:
                    start = time.time()
                    print('- Step: %d' % (step + 1))

                # Send information about the process
                if callback:
                    callback(step + 1)

                # Optimize rating prediction
                for u, j in zip(*ratings.nonzero()):

                    # Store lists in cache
                    items_u = cache_rows(ratings, u, users_cache)
                    trustees_u = cache_rows(trust, u, trusters_cache)
                    # No need to cast for CV due to max(num_users, shape_t[0])

                    # Prediction and error
                    ruj_pred, y_term, w_term, norm_Iu, norm_Tu = \
                        _predict(u, j, global_avg, bu, bi, P, Q, Y, W, items_u,
                                   trustees_u)
                    euj = ruj_pred - ratings[u, j]

                    # Store/Compute norms
                    norm_I[u] = norm_Iu
                    norm_Tr[u] = norm_Tu
                    norm_Uj = cache_norms(ratings_T, j, norm_U)

                    # Gradient Bu
                    reg_bu = (bias_lmbda /
                              norm_Iu) * bu[u] if norm_Iu > 0 else 0
                    dx_bu = euj + reg_bu

                    # Gradient Bi
                    reg_bi = (bias_lmbda /
                              norm_Uj) * bi[j] if norm_Uj > 0 else 0
                    dx_bi = euj + reg_bi

                    # Update the gradients Bu, Bi at the same time
                    update_bu(dx_bu, bu, u)
                    update_bj(dx_bi, bi, j)

                    # Gradient P
                    reg_p = (lmbda / norm_Iu) * P[u, :] if norm_Iu > 0 else 0
                    dx_pu = euj * Q[j, :] + reg_p
                    update_pu(dx_pu, P, u)

                    # Gradient Q
                    reg_q = (lmbda / norm_Uj) * Q[j, :] if norm_Uj > 0 else 0
                    dx_qi = euj * (P[u, :] + y_term + w_term) + reg_q
                    update_qj(dx_qi, Q, j)

                    # Gradient Y
                    if norm_Iu > 0:
                        tempY1 = (euj / norm_Iu) * Q[j, :]
                        norms = cache_norms(ratings_T, items_u, norm_U)
                        norm_b = (lmbda / np.atleast_2d(norms))
                        dx_yi = tempY1 + np.multiply(norm_b.T, Y[items_u, :])
                        update_yi(dx_yi, Y, items_u)

                    # Gradient W
                    if norm_Tu > 0:
                        tempW1 = (euj / norm_Tu) * Q[j, :]  # W: Part 1
                        norms = cache_norms(trust_T, trustees_u, norm_Tc)
                        norm_b = (lmbda / np.atleast_2d(norms))
                        dx_wv = tempW1 + np.multiply(norm_b.T,
                                                     W[trustees_u, :])
                        update_wv(dx_wv, W, trustees_u)

                # Optimize trust prediction
                for u, v in zip(*trust.nonzero()):

                    # Prediction and error
                    tuv_pred = np.dot(W[v, :], P[u, :])
                    euv = tuv_pred - trust[u, v]

                    # Gradient P (Part 2)
                    norm_Tu = cache_norms(trust, u, norm_Tr)
                    reg_p = P[u, :] / norm_Tu if norm_Tu > 0 else 0
                    dx_pu = social_lmbda * (euv * W[v, :] + reg_p)
                    update_pu(dx_pu, P, u)

                    # Gradient W (Part 2)
                    dx_wv = social_lmbda * euv * P[u, :]
                    update_wv(dx_wv, W, v)

                # Print process
                if verbose:
                    print('\t- Time: %.3fs' % (time.time() - start))

                    if verbose > 1:
                        # Set parameters and compute loss
                        data_t = (ratings, trust)
                        bias_t = (global_avg, bu, bi)
                        low_rank_matrices = (P, Q, Y, W)
                        params = (lmbda, bias_lmbda, social_lmbda)
                        objective = compute_loss(data_t, bias_t,
                                                 low_rank_matrices, params)

                        print('\t- Training loss: %.3f' % objective)
                    print('')

                # Send information about the process
                if callback:
                    callback(step + 1)

        except RuntimeWarning:
            callback(num_iter) if callback else None
            raise RuntimeError('Training diverged and returned NaN.')

    return P, Q, Y, W, bu, bi, users_cache
Esempio n. 13
0
def compute_loss(data, bias, low_rank_matrices, params):

    # Set parameters
    ratings, trust = data
    global_avg, bu, bi = bias
    P, Q, Y, W = low_rank_matrices
    lmbda, bias_lmbda, social_lmbda = params

    # Check data type
    if isinstance(ratings, __sparse_format__):
        pass
    elif isinstance(ratings, Table):
        # Preprocess Orange.data.Table and transform it to sparse
        ratings, order, shape = preprocess(ratings)
        ratings = table2sparse(ratings, shape, order, m_type=__sparse_format__)
    else:
        raise TypeError('Invalid data type')

    # Check data type
    if isinstance(trust, dict) or isinstance(trust, __sparse_format__):
        pass
    elif isinstance(trust, Table):
        # Preprocess Orange.data.Table and transform it to sparse
        trust, order, shape = preprocess(trust)
        trust = table2sparse(trust, shape, order, m_type=__sparse_format__)
    else:
        raise TypeError('Invalid data type')

    # Get featured matrices dimensions
    num_users, num_items = ratings.shape
    num_users = max(num_users, max(trust.shape))

    # Cache rows
    # >>> From 2 days to 30s
    users_cache = defaultdict(list)
    trusters_cache = defaultdict(list)

    # Cache norms (slower than list, but allows vectorization)
    # >>>  Lists: 6s; Arrays: 12s -> vectorized: 2s
    norm_I = np.zeros(num_users)  # norms of Iu
    norm_U = np.zeros(num_items)  # norms of Uj
    norm_Tr = np.zeros(num_users)  # norms of Tu
    norm_Tc = np.zeros(num_users)  # norms of Tv

    # Precompute transpose (most costly operation)
    ratings_T = ratings.T
    trust_T = trust.T

    # Optimize rating prediction
    objective = 0
    for u, j in zip(*ratings.nonzero()):

        # Store lists in cache
        items_u = cache_rows(ratings, u, users_cache)
        trustees_u = cache_rows(trust, u, trusters_cache)

        # Prediction and error
        ruj_pred, _, _, norm_Iu, norm_Tu = \
            _predict(u, j, global_avg, bu, bi, P, Q, Y, W, items_u, trustees_u)

        # Cache norms
        norm_I[u] = norm_Iu
        norm_Tr[u] = norm_Tu

        # Compute loss
        objective += 0.5 * (ruj_pred - ratings[u, j])**2

    # Optimize trust prediction
    for u, v in zip(*trust.nonzero()):
        # Prediction
        tuv_pred = np.dot(W[v, :], P[u, :])

        # Compute loss
        objective += social_lmbda * 0.5 * (tuv_pred - trust[u, v])**2

    for u in range(P.shape[0]):  # users
        # Cache norms
        norm_Iu = cache_norms(ratings, u, norm_I)
        norm_Tu = cache_norms(trust, u, norm_Tr)
        norm_Tv = cache_norms(trust_T, u, norm_Tc)

        # Compute loss
        term_l = 0
        if norm_Iu > 0:
            objective += bias_lmbda / (2 * norm_Iu) * bu[u]**2
            term_l = lmbda / (2 * norm_Iu)

        term_s = 0
        if norm_Tu > 0:
            term_s = social_lmbda / (2 * norm_Tu)

        term_ls = term_l + term_s
        if term_ls > 0:
            objective += term_ls * np.linalg.norm(P[u, :])**2

        if norm_Tv > 0:
            objective += lmbda / (2 * norm_Tv) * np.linalg.norm(W[u, :])**2

    for j in range(Q.shape[0]):  # items
        # Cache norms
        norm_Uj = cache_norms(ratings_T, j, norm_U)

        # Compute loss
        if norm_Uj > 0:
            objective += bias_lmbda / (2 * norm_Uj) * bi[j]**2
            objective += lmbda / (2 * norm_Uj) * np.linalg.norm(Q[j, :])**2
            objective += lmbda / (2 * norm_Uj) * np.linalg.norm(Y[j, :])**2

    return objective
def _matrix_factorization(ratings, feedback, bias, shape, num_factors, num_iter,
                          learning_rate, bias_learning_rate, lmbda, bias_lmbda,
                          optimizer, verbose=False, random_state=None,
                          callback=None):

    # Seed the generator
    if random_state is not None:
        np.random.seed(random_state)

    # Get featured matrices dimensions
    num_users, num_items = shape

    # Initialize low-rank matrices
    P = np.random.rand(num_users, num_factors)  # User-feature matrix
    Q = np.random.rand(num_items, num_factors)  # Item-feature matrix
    Y = np.random.randn(num_items, num_factors)  # Feedback-feature matrix

    # Compute bias (not need it if learnt)
    global_avg = bias['globalAvg']
    bu = bias['dUsers']
    bi = bias['dItems']

    # Configure optimizer
    update_bu = create_opt(optimizer, bias_learning_rate).update
    update_bj = create_opt(optimizer, bias_learning_rate).update
    update_pu = create_opt(optimizer, learning_rate).update
    update_qj = create_opt(optimizer, learning_rate).update
    update_yi = create_opt(optimizer, learning_rate).update

    # Cache rows
    users_cached = defaultdict(list)
    feedback_cached = defaultdict(list)

    # Print information about the verbosity level
    if verbose:
        print('SVD++ factorization started.')
        print('\tLevel of verbosity: ' + str(int(verbose)))
        print('\t\t- Verbosity = 1\t->\t[time/iter]')
        print('\t\t- Verbosity = 2\t->\t[time/iter, loss]')
        print('')

    # Catch warnings
    with warnings.catch_warnings():

        # Turn matching warnings into exceptions
        warnings.filterwarnings('error')
        try:

            # Factorize matrix using SGD
            for step in range(num_iter):
                if verbose:
                    start = time.time()
                    print('- Step: %d' % (step + 1))

                # Send information about the process
                if callback:
                    callback(step + 1)

                # Optimize rating prediction
                for u, j in zip(*ratings.nonzero()):

                    # if there is no feedback, infer it from the ratings
                    if feedback is None:
                        feedback_u = cache_rows(ratings, u, users_cached)
                    else:
                        feedback_u = cache_rows(feedback, u, feedback_cached)
                        feedback_u = feedback_u[feedback_u < num_items]  # For CV

                    # Prediction and error
                    ruj_pred, y_term, norm_feedback = \
                        _predict(u, j, global_avg, bu, bi, P, Q, Y, feedback_u)
                    eij = ratings[u, j] - ruj_pred

                    # Compute gradients
                    dx_bu = -eij + bias_lmbda * bu[u]
                    dx_bi = -eij + bias_lmbda * bi[j]
                    dx_pu = -eij * Q[j, :] + lmbda * P[u, :]
                    dx_qi = -eij * (P[u, :] + y_term) + lmbda * Q[j, :]

                    # Update the gradients at the same time
                    update_bu(dx_bu, bu, u)
                    update_bj(dx_bi, bi, j)
                    update_pu(dx_pu, P, u)
                    update_qj(dx_qi, Q, j)

                    if norm_feedback > 0:  # Gradient Y
                        dx_yi = -eij/norm_feedback * Q[j, :] \
                                + lmbda * Y[feedback_u, :]
                        update_yi(dx_yi, Y, feedback_u)

                # Print process
                if verbose:
                    print('\t- Time: %.3fs' % (time.time() - start))

                    if verbose > 1:
                        # Set parameters and compute loss
                        loss_feedback = feedback if feedback else users_cached
                        data_t = (ratings, loss_feedback)
                        bias_t = (global_avg, bu, bi)
                        low_rank_matrices = (P, Q, Y)
                        params = (lmbda, bias_lmbda)
                        objective = compute_loss(
                            data_t, bias_t, low_rank_matrices, params)

                        print('\t- Training loss: %.3f' % objective)
                    print('')

            if feedback is None:
                feedback = users_cached

        except RuntimeWarning as e:
            callback(num_iter) if callback else None
            raise RuntimeError('Training diverged and returned NaN.')

    return P, Q, Y, bu, bi, feedback