Ejemplo n.º 1
0
 def __init__(self, model, similarity, arg, items_selection_strategy=None, capper=True, with_preference=False):
     self.similarity = similarity
     self.capper = capper
     self.model = model
     if items_selection_strategy is None:
         self.items_selection_strategy = ItemsNeighborhoodStrategy()
     else:
         self.items_selection_strategy = items_selection_strategy
Ejemplo n.º 2
0
 def __init__(self, model, similarity, items_selection_strategy=None,
             capper=True, with_preference=False):
     ItemRecommender.__init__(self, model, with_preference)
     self.similarity = similarity
     self.capper = capper
     if items_selection_strategy is None:
         self.items_selection_strategy = ItemsNeighborhoodStrategy()
     else:
         self.items_selection_strategy = items_selection_strategy
Ejemplo n.º 3
0
Archivo: classes.py Proyecto: Zeag/crab
 def __init__(self, model, similarity, items_selection_strategy=None, capper=True, with_preference=False):
     ItemRecommender.__init__(self, model, with_preference)
     self.similarity = similarity
     self.capper = capper
     if items_selection_strategy is None:
         self.items_selection_strategy = ItemsNeighborhoodStrategy()
     else:
         self.items_selection_strategy = items_selection_strategy
Ejemplo n.º 4
0
class ItemBasedRecommender(ItemRecommender):
    """
    Item Based Collaborative Filtering Recommender.


    Parameters
    -----------
    data_model: The data model instance that will be data source
         for the recommender.

    similarity: The Item Similarity instance that will be used to
        score the items that will be recommended.

    items_selection_strategy: The item candidates strategy that you
     can choose for selecting the possible items to recommend.
     default = ItemsNeighborhoodStrategy

    capper: bool (default=True)
        Cap the preferences with maximum and minimum preferences
        in the model.
    with_preference: bool (default=False)
        Return the recommendations with the estimated preferences if True.

    Attributes
    -----------
    `model`: The data model instance that will be data source
         for the recommender.

    `similarity`: The Item Similarity instance that will be used to
        score the items that will be recommended.

    `items_selection_strategy`: The item candidates strategy that you
         can choose for selecting the possible items to recommend.
         default = ItemsNeighborhoodStrategy

    `capper`: bool (default=True)
        Cap the preferences with maximum and minimum preferences
        in the model.
    `with_preference`: bool (default=False)
        Return the recommendations with the estimated preferences if True.

    Examples
    -----------
    >>> from scikits.crab.models.classes import MatrixPreferenceDataModel
    >>> from scikits.crab.recommenders.knn.classes import ItemBasedRecommender
    >>> from scikits.crab.similarities.basic_similarities import ItemSimilarity
    >>> from scikits.crab.recommenders.knn.item_strategies import ItemsNeighborhoodStrategy
    >>> from scikits.crab.metrics.pairwise import euclidean_distances
    >>> movies = {'Marcel Caraciolo': {'Lady in the Water': 2.5, \
     'Snakes on a Plane': 3.5, \
     'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5, \
     'The Night Listener': 3.0}, \
     'Paola Pow': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5, \
     'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0, \
     'You, Me and Dupree': 3.5}, \
    'Leopoldo Pires': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0, \
     'Superman Returns': 3.5, 'The Night Listener': 4.0}, \
    'Lorena Abreu': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, \
     'The Night Listener': 4.5, 'Superman Returns': 4.0, \
     'You, Me and Dupree': 2.5}, \
    'Steve Gates': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, \
     'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0, \
     'You, Me and Dupree': 2.0}, \
    'Sheldom': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, \
     'The Night Listener': 3.0, 'Superman Returns': 5.0, \
     'You, Me and Dupree': 3.5}, \
    'Penny Frewman': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0, \
    'Superman Returns':4.0}, \
    'Maria Gabriela': {}}
    >>> model = MatrixPreferenceDataModel(movies)
    >>> items_strategy = ItemsNeighborhoodStrategy()
    >>> similarity = ItemSimilarity(model, euclidean_distances)
    >>> recsys = ItemBasedRecommender(model, similarity, items_strategy)
    >>> #Return the recommendations for the given user.
    >>> recsys.recommend('Leopoldo Pires')
    ['Just My Luck', 'You, Me and Dupree']
    >>> #Return the 2 explanations for the given recommendation.
    >>> recsys.recommended_because('Leopoldo Pires', 'Just My Luck',2)
    ['The Night Listener', 'Superman Returns']

    Notes
    -----------
    This ItemBasedRecommender does not yet provide
    suppot for rescorer functions.

    References
    -----------
    Item-based collaborative filtering recommendation algorithms by Sarwar
    http://portal.acm.org/citation.cfm?id=372071

    """

    def __init__(self, model, similarity, items_selection_strategy=None,
                capper=True, with_preference=False):
        ItemRecommender.__init__(self, model, with_preference)
        self.similarity = similarity
        self.capper = capper
        if items_selection_strategy is None:
            self.items_selection_strategy = ItemsNeighborhoodStrategy()
        else:
            self.items_selection_strategy = items_selection_strategy

    def recommend(self, user_id, how_many=None, **params):
        '''
        Return a list of recommended items, ordered from most strongly
        recommend to least.

        Parameters
        ----------
        user_id: int or string
                 User for which recommendations are to be computed.
        how_many: int
                 Desired number of recommendations (default=None ALL)

        '''
        # self._set_params(**params)

        candidate_items = self.all_other_items(user_id)

        recommendable_items = self._top_matches(user_id, \
                 candidate_items, how_many)

        return recommendable_items

    def estimate_preference(self, user_id, item_id, **params):
        '''
        Parameters
        ----------
        user_id: int or string
                 User for which recommendations are to be computed.

        item_id:  int or string
            ID of item for which wants to find the estimated preference.

        Returns
        -------
        Return an estimated preference if the user has not expressed a
        preference for the item, or else the user's actual preference for the
        item. If a preference cannot be estimated, returns None.
        '''
        preference = self.model.preference_value(user_id, item_id)

        if not np.isnan(preference):
            return preference

        #TODO: It needs optimization
        prefs = self.model.preferences_from_user(user_id)

        if not self.model.has_preference_values():
            prefs = [(pref, 1.0) for pref in prefs]

        similarities = \
            np.array([self.similarity.get_similarity(item_id, to_item_id) \
            for to_item_id, pref in prefs if to_item_id != item_id]).flatten()

        prefs = np.array([pref for it, pref in prefs])
        prefs_sim = np.sum(prefs[~np.isnan(similarities)] *
                             similarities[~np.isnan(similarities)])
        total_similarity = np.sum(similarities)

        #Throw out the estimate if it was based on no data points,
        #of course, but also if based on
        #just one. This is a bit of a band-aid on the 'stock'
        #item-based algorithm for the moment.
        #The reason is that in this case the estimate is, simply,
        #the user's rating for one item
        #that happened to have a defined similarity.
        #The similarity score doesn't matter, and that
        #seems like a bad situation.
        if total_similarity == 0.0 or \
           not similarities[~np.isnan(similarities)].size:
            return np.nan

        estimated = prefs_sim / total_similarity

        if self.capper:
            max_p = self.model.maximum_preference_value()
            min_p = self.model.minimum_preference_value()
            estimated = max_p if estimated > max_p else min_p \
                     if estimated < min_p else estimated
        return estimated

    def all_other_items(self, user_id, **params):
        '''
        Parameters
        ----------
        user_id: int or string
                 User for which recommendations are to be computed.

        Returns
        ---------
        Return items in the `model` for which the user has not expressed
        the preference and could possibly be recommended to the user.

        '''
        return self.items_selection_strategy.candidate_items(user_id, \
                            self.model)

    def _top_matches(self, source_id, target_ids, how_many=None, **params):
        '''
        Parameters
        ----------
        target_ids: array of shape [n_target_ids]

        source_id: int or string
                item id to compare against.

        how_many: int
            Desired number of most top items to recommend (default=None ALL)

        Returns
        --------
        Return the top N matches
        It can be user_ids or item_ids.
        '''
        #Empty target_ids
        if target_ids.size == 0:
            return np.array([])

        estimate_preferences = np.vectorize(self.estimate_preference)

        preferences = estimate_preferences(source_id, target_ids)

        preference_values = preferences[~np.isnan(preferences)]
        target_ids = target_ids[~np.isnan(preferences)]

        sorted_preferences = np.lexsort((preference_values,))[::-1]

        sorted_preferences = sorted_preferences[0:how_many] \
             if how_many and sorted_preferences.size > how_many \
                else sorted_preferences

        if self.with_preference:
            top_n_recs = [(target_ids[ind], \
                     preferences[ind]) for ind in sorted_preferences]
        else:
            top_n_recs = [target_ids[ind]
                 for ind in sorted_preferences]

        return top_n_recs

    def most_similar_items(self, item_id, how_many=None):
        '''
        Return the most similar items to the given item, ordered
        from most similar to least.

        Parameters
        -----------
        item_id:  int or string
            ID of item for which to find most similar other items

        how_many: int
            Desired number of most similar items to find (default=None ALL)
        '''
        old_how_many = self.similarity.num_best
        #+1 since it returns the identity.
        self.similarity.num_best = how_many + 1 \
                    if how_many is not None else None
        similarities = self.similarity[item_id]
        self.similarity.num_best = old_how_many

        return np.array([item for item, pref in similarities \
            if item != item_id and not np.isnan(pref)])

    def recommended_because(self, user_id, item_id, how_many=None, **params):
        '''
        Returns the items that were most influential in recommending a
        given item to a given user. In most implementations, this
        method will return items that the user prefers and that
        are similar to the given item.

        Parameters
        -----------
        user_id : int or string
            ID of the user who was recommended the item

        item_id: int or string
            ID of item that was recommended

        how_many: int
            Maximum number of items to return (default=None ALL)

        Returns
        ----------
        The list of items ordered from most influential in
        recommended the given item to least
        '''
        preferences = self.model.preferences_from_user(user_id)

        if self.model.has_preference_values():
            similarities = \
                np.array([self.similarity.get_similarity(item_id, to_item_id) \
                    for to_item_id, pref in preferences
                        if to_item_id != item_id]).flatten()
            prefs = np.array([pref for it, pref in preferences])
            item_ids = np.array([it for it, pref in preferences])
        else:
            similarities = \
                np.array([self.similarity.get_similarity(item_id, to_item_id) \
                for to_item_id in preferences
                    if to_item_id != item_id]).flatten()
            prefs = np.array([1.0 for it in preferences])
            item_ids = np.array(preferences)

        scores = prefs[~np.isnan(similarities)] * \
             (1.0 + similarities[~np.isnan(similarities)])

        sorted_preferences = np.lexsort((scores,))[::-1]

        sorted_preferences = sorted_preferences[0:how_many] \
             if how_many and sorted_preferences.size > how_many \
                 else sorted_preferences

        if self.with_preference:
            top_n_recs = [(item_ids[ind], \
                     prefs[ind]) for ind in sorted_preferences]
        else:
            top_n_recs = [item_ids[ind]
                 for ind in sorted_preferences]

        return top_n_recs
Ejemplo n.º 5
0
class ItemBasedRecommender(ItemRecommender):
    """
    Item Based Collaborative Filtering Recommender.


    Parameters
    -----------
    data_model: The data model instance that will be data source
         for the recommender.

    similarity: The Item Similarity instance that will be used to
        score the items that will be recommended.

    items_selection_strategy: The item candidates strategy that you
     can choose for selecting the possible items to recommend.
     default = ItemsNeighborhoodStrategy

    capper: bool (default=True)
        Cap the preferences with maximum and minimum preferences
        in the model.
    with_preference: bool (default=False)
        Return the recommendations with the estimated preferences if True.

    Attributes
    -----------
    `model`: The data model instance that will be data source
         for the recommender.

    `similarity`: The Item Similarity instance that will be used to
        score the items that will be recommended.

    `items_selection_strategy`: The item candidates strategy that you
         can choose for selecting the possible items to recommend.
         default = ItemsNeighborhoodStrategy

    `capper`: bool (default=True)
        Cap the preferences with maximum and minimum preferences
        in the model.
    `with_preference`: bool (default=False)
        Return the recommendations with the estimated preferences if True.

    Examples
    -----------
    >>> from scikits.crab.models.classes import MatrixPreferenceDataModel
    >>> from scikits.crab.recommenders.knn.classes import ItemBasedRecommender
    >>> from scikits.crab.similarities.basic_similarities import ItemSimilarity
    >>> from scikits.crab.recommenders.knn.item_strategies import ItemsNeighborhoodStrategy
    >>> from scikits.crab.metrics.pairwise import euclidean_distances
    >>> movies = {'Marcel Caraciolo': {'Lady in the Water': 2.5, \
     'Snakes on a Plane': 3.5, \
     'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5, \
     'The Night Listener': 3.0}, \
     'Paola Pow': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5, \
     'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0, \
     'You, Me and Dupree': 3.5}, \
    'Leopoldo Pires': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0, \
     'Superman Returns': 3.5, 'The Night Listener': 4.0}, \
    'Lorena Abreu': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, \
     'The Night Listener': 4.5, 'Superman Returns': 4.0, \
     'You, Me and Dupree': 2.5}, \
    'Steve Gates': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, \
     'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0, \
     'You, Me and Dupree': 2.0}, \
    'Sheldom': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, \
     'The Night Listener': 3.0, 'Superman Returns': 5.0, \
     'You, Me and Dupree': 3.5}, \
    'Penny Frewman': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0, \
    'Superman Returns':4.0}, \
    'Maria Gabriela': {}}
    >>> model = MatrixPreferenceDataModel(movies)
    >>> items_strategy = ItemsNeighborhoodStrategy()
    >>> similarity = ItemSimilarity(model, euclidean_distances)
    >>> recsys = ItemBasedRecommender(model, similarity, items_strategy)
    >>> #Return the recommendations for the given user.
    >>> recsys.recommend('Leopoldo Pires')
    ['Just My Luck', 'You, Me and Dupree']
    >>> #Return the 2 explanations for the given recommendation.
    >>> recsys.recommended_because('Leopoldo Pires', 'Just My Luck',2)
    ['The Night Listener', 'Superman Returns']

    Notes
    -----------
    This ItemBasedRecommender does not yet provide
    suppot for rescorer functions.

    References
    -----------
    Item-based collaborative filtering recommendation algorithms by Sarwar
    http://portal.acm.org/citation.cfm?id=372071

    """
    def __init__(self,
                 model,
                 similarity,
                 items_selection_strategy=None,
                 capper=True,
                 with_preference=False):
        ItemRecommender.__init__(self, model, with_preference)
        self.similarity = similarity
        self.capper = capper
        if items_selection_strategy is None:
            self.items_selection_strategy = ItemsNeighborhoodStrategy()
        else:
            self.items_selection_strategy = items_selection_strategy

    def recommend(self, user_id, how_many=None, **params):
        '''
        Return a list of recommended items, ordered from most strongly
        recommend to least.

        Parameters
        ----------
        user_id: int or string
                 User for which recommendations are to be computed.
        how_many: int
                 Desired number of recommendations (default=None ALL)

        '''
        self._set_params(**params)

        candidate_items = self.all_other_items(user_id)

        recommendable_items = self._top_matches(user_id, \
                 candidate_items, how_many)

        return recommendable_items

    def estimate_preference(self, user_id, item_id, **params):
        '''
        Parameters
        ----------
        user_id: int or string
                 User for which recommendations are to be computed.

        item_id:  int or string
            ID of item for which wants to find the estimated preference.

        Returns
        -------
        Return an estimated preference if the user has not expressed a
        preference for the item, or else the user's actual preference for the
        item. If a preference cannot be estimated, returns None.
        '''
        preference = self.model.preference_value(user_id, item_id)

        if not np.isnan(preference):
            return preference

        #TODO: It needs optimization
        prefs = self.model.preferences_from_user(user_id)

        if not self.model.has_preference_values():
            prefs = [(pref, 1.0) for pref in prefs]

        similarities = \
            np.array([self.similarity.get_similarity(item_id, to_item_id) \
            for to_item_id, pref in prefs if to_item_id != item_id]).flatten()

        prefs = np.array([pref for it, pref in prefs])
        prefs_sim = np.sum(prefs[~np.isnan(similarities)] *
                           similarities[~np.isnan(similarities)])
        total_similarity = np.sum(similarities)

        #Throw out the estimate if it was based on no data points,
        #of course, but also if based on
        #just one. This is a bit of a band-aid on the 'stock'
        #item-based algorithm for the moment.
        #The reason is that in this case the estimate is, simply,
        #the user's rating for one item
        #that happened to have a defined similarity.
        #The similarity score doesn't matter, and that
        #seems like a bad situation.
        if total_similarity == 0.0 or \
           not similarities[~np.isnan(similarities)].size:
            return np.nan

        estimated = prefs_sim / total_similarity

        if self.capper:
            max_p = self.model.maximum_preference_value()
            min_p = self.model.minimum_preference_value()
            estimated = max_p if estimated > max_p else min_p \
                     if estimated < min_p else estimated
        return estimated

    def all_other_items(self, user_id, **params):
        '''
        Parameters
        ----------
        user_id: int or string
                 User for which recommendations are to be computed.

        Returns
        ---------
        Return items in the `model` for which the user has not expressed
        the preference and could possibly be recommended to the user.

        '''
        return self.items_selection_strategy.candidate_items(user_id, \
                            self.model)

    def _top_matches(self, source_id, target_ids, how_many=None, **params):
        '''
        Parameters
        ----------
        target_ids: array of shape [n_target_ids]

        source_id: int or string
                item id to compare against.

        how_many: int
            Desired number of most top items to recommend (default=None ALL)

        Returns
        --------
        Return the top N matches
        It can be user_ids or item_ids.
        '''
        #Empty target_ids
        if target_ids.size == 0:
            return np.array([])

        estimate_preferences = np.vectorize(self.estimate_preference)

        preferences = estimate_preferences(source_id, target_ids)

        preference_values = preferences[~np.isnan(preferences)]
        target_ids = target_ids[~np.isnan(preferences)]

        sorted_preferences = np.lexsort((preference_values, ))[::-1]

        sorted_preferences = sorted_preferences[0:how_many] \
             if how_many and sorted_preferences.size > how_many \
                else sorted_preferences

        if self.with_preference:
            top_n_recs = [(target_ids[ind], \
                     preferences[ind]) for ind in sorted_preferences]
        else:
            top_n_recs = [target_ids[ind] for ind in sorted_preferences]

        return top_n_recs

    def most_similar_items(self, item_id, how_many=None):
        '''
        Return the most similar items to the given item, ordered
        from most similar to least.

        Parameters
        -----------
        item_id:  int or string
            ID of item for which to find most similar other items

        how_many: int
            Desired number of most similar items to find (default=None ALL)
        '''
        old_how_many = self.similarity.num_best
        #+1 since it returns the identity.
        self.similarity.num_best = how_many + 1 \
                    if how_many is not None else None
        similarities = self.similarity[item_id]
        self.similarity.num_best = old_how_many

        return np.array([item for item, pref in similarities \
            if item != item_id and not np.isnan(pref)])

    def recommended_because(self, user_id, item_id, how_many=None, **params):
        '''
        Returns the items that were most influential in recommending a
        given item to a given user. In most implementations, this
        method will return items that the user prefers and that
        are similar to the given item.

        Parameters
        -----------
        user_id : int or string
            ID of the user who was recommended the item

        item_id: int or string
            ID of item that was recommended

        how_many: int
            Maximum number of items to return (default=None ALL)

        Returns
        ----------
        The list of items ordered from most influential in
        recommended the given item to least
        '''
        preferences = self.model.preferences_from_user(user_id)

        if self.model.has_preference_values():
            similarities = \
                np.array([self.similarity.get_similarity(item_id, to_item_id) \
                    for to_item_id, pref in preferences
                        if to_item_id != item_id]).flatten()
            prefs = np.array([pref for it, pref in preferences])
            item_ids = np.array([it for it, pref in preferences])
        else:
            similarities = \
                np.array([self.similarity.get_similarity(item_id, to_item_id) \
                for to_item_id in preferences
                    if to_item_id != item_id]).flatten()
            prefs = np.array([1.0 for it in preferences])
            item_ids = np.array(preferences)

        scores = prefs[~np.isnan(similarities)] * \
             (1.0 + similarities[~np.isnan(similarities)])

        sorted_preferences = np.lexsort((scores, ))[::-1]

        sorted_preferences = sorted_preferences[0:how_many] \
             if how_many and sorted_preferences.size > how_many \
                 else sorted_preferences

        if self.with_preference:
            top_n_recs = [(item_ids[ind], \
                     prefs[ind]) for ind in sorted_preferences]
        else:
            top_n_recs = [item_ids[ind] for ind in sorted_preferences]

        return top_n_recs
Ejemplo n.º 6
0
class ItemBasedRecommender(ItemRecommender):
    """
    Item Based Collaborative Filtering Recommender.
    """

    def __init__(self, model, similarity, arg, items_selection_strategy=None, capper=True, with_preference=False):
        self.similarity = similarity
        self.capper = capper
        self.model = model
        if items_selection_strategy is None:
            self.items_selection_strategy = ItemsNeighborhoodStrategy()
        else:
            self.items_selection_strategy = items_selection_strategy

    def recommended(self, user_id, how_many=None):
        candidate_items = self.all_other_items(user_id)
        recommendable_items = self._top_matches(user_id, candidate_items, how_many)
        return recommendable_items

    def estimate_preference(self, user_id, item_id):
        preference = self.model.preference_value(user_id, item_id)

        if not np.isnan(preference):
            return preference

        prefs = self.model.preferences_from_user(user_id)
        #lay cac gia tri da rating cuar user

        similarities = np.array([self.similarity.get_similarity(item_id, to_item_id) for to_item_id, pref in prefs if
                                 to_item_id != item_id]).flatten()
        #mang tra ve gia tri do giong nhau giua item vs candidate

        prefs = np.array([pref for it, pref in prefs])

        prefs_sim = np.sum(prefs[~np.isnan(similarities)] * similarities[~np.isnan(similarities)])

        total_similarity = np.sum(similarities)

        if total_similarity == 0.0 or not similarities[~np.isnan(similarities)].size:
            return np.nan

        estimated = prefs_sim / total_similarity

        if self.capper:
            max_p = self.model.maximum_preference_value()
            min_p = self.model.minimum_preference_value()
            estimated = max_p if estimated > max_p else min_p \
                if estimated < min_p else estimated
        return estimated


    def all_other_items(self, user_id):
        return self.items_selection_strategy.candidate_items(user_id, self.model)


    def _top_matches(self, source_id, target_ids, how_many=None):
        if target_ids.size == 0:
            return np.array([])
        preferences = np.array([self.estimate_preference(source_id, target_id) for target_id in target_ids])
        preference_values = preferences[~np.isnan(preferences)]
        target_ids = target_ids[~np.isnan(preferences)]
        sorted_preferences = np.lexsort((preference_values,))[::-1]
        sorted_preferences = sorted_preferences[0:how_many] \
            if how_many and sorted_preferences.size > how_many \
            else sorted_preferences
        top_n_recs = [(target_ids[ind], \
                       preference_values[ind]) for ind in sorted_preferences]
        return top_n_recs