Ejemplo n.º 1
0
    def fit(self, X, y=None, columns={}):
        """Fit the model to a dataset.

        Parameters
        ----------
        X : pandas.DataFrame
            The input data, must contain the columns **user** and **item**. May contain the **score** column as well.
        y : pandas.Series or list
            The target values. If not set (and X doesn't contain the score column), it is assumed to be constant 1 (implicit recommendation).
        columns : dict
            Optionally the mapping of the input DataFrame's columns' names to the expected ones.

        """
        rs.collect()
        data = X
        if y is None:
            if 'score' not in X:
                data['score'] = np.ones(len(X))
        else:
            if 'score' in X:
                raise ValueError("y and score column both provided")
            else:
                data['score'] = y

        recommender_data = DataframeData(data, columns=columns)

        matrix = recommender_data.get_full_matrix()
        users = rs.VectorInt([])
        items = rs.VectorInt([])
        recommender_data.get_users_into(users)
        recommender_data.get_items_into(items)

        (model, learner) = self._fit(recommender_data, users, items, matrix)

        created_objects = rs.get_and_clean()
        rs.initialize_all(created_objects)
        for i in created_objects:
            rs.run_self_test(i)
        self.check_unused_parameters()

        learner.fit(recommender_data)

        self.objects = created_objects
        self.model = model
        self.items = items
        self.users = users
        self.matrix = matrix
        self.recommender_data = recommender_data
Ejemplo n.º 2
0
    def recommend(self, users=None, k=100, exclude_known=True):
        """Give toplist recommendations for users.

        Parameters
        ----------
        users : list
            List of users to give recommendation for.
        k : int
            Size of toplists
        exclude_known : bool
            Whether to exclude (user,item) pairs in the train dataset from the toplists.

        Returns
        -------
        pandas.DataFrame
          DataFrame of recommendations, with columns **user**, **item** and **rank**.

        """

        rs.collect()
        dummy_model_filter = rs.DummyModelFilter()
        dummy_model_filter.set_items(self.items)
        dummy_model_filter.set_users(self.users)

        pred_creator = rs.PredictionCreatorPersonalized(
            top_k=k, lookback=1 if exclude_known else 0)

        pred_creator.set_filter(dummy_model_filter)
        pred_creator.set_train_matrix(self.matrix)
        pred_creator.set_model(self.model)

        ranking_computer = rs.OfflineRankingComputer(top_k=k)
        ranking_computer.set_items(self.items)
        if users is None:
            ranking_computer.set_users(self.users)
        else:
            ranking_computer.set_users(
                rs.VectorInt(pd.Series(users).unique().tolist()))

        ranking_computer.set_toplist_creator(pred_creator)

        created_objects = rs.get_and_clean()
        # rs.initialize_all(created_objects)
        for i in created_objects:
            rs.run_self_test(i)

        preds = ranking_computer.compute()
        preds_df = pd.DataFrame({
            'user': preds.users,
            'item': preds.items,
            'rank': preds.ranks
        }).sort_values(['user', 'rank'])[['user', 'item', 'rank']]
        return preds_df