예제 #1
0
def test_fill_users():
    rla = topn.RecListAnalysis()
    rla.add_metric(topn.precision)
    rla.add_metric(topn.recall)

    algo = UserUser(20, min_nbrs=10)
    algo = Recommender.adapt(algo)

    splits = xf.sample_users(ml_test.ratings, 1, 50, xf.SampleN(5))
    train, test = next(splits)
    algo.fit(train)

    rec_users = test['user'].sample(50).unique()
    recs = batch.recommend(algo, rec_users, 25)

    scores = rla.compute(recs, test, include_missing=True)
    assert len(scores) == test['user'].nunique()
    assert scores['recall'].notna().sum() == len(rec_users)
    assert all(scores['ntruth'] == 5)

    mscores = rla.compute(recs, test)
    assert len(mscores) < len(scores)

    recall = scores.loc[scores['recall'].notna(), 'recall'].copy()
    recall, mrecall = recall.align(mscores['recall'])
    assert all(recall == mrecall)
예제 #2
0
def test_adv_fill_users():
    rla = topn.RecListAnalysis()
    rla.add_metric(topn.precision)
    rla.add_metric(topn.recall)

    a_uu = UserUser(30, min_nbrs=10)
    a_uu = Recommender.adapt(a_uu)
    a_ii = ItemItem(20, min_nbrs=4)
    a_ii = Recommender.adapt(a_ii)

    splits = xf.sample_users(ml_test.ratings, 2, 50, xf.SampleN(5))
    all_recs = {}
    all_test = {}
    for i, (train, test) in enumerate(splits):
        a_uu.fit(train)
        rec_users = test['user'].sample(50).unique()
        all_recs[(i + 1, 'UU')] = batch.recommend(a_uu, rec_users, 25)

        a_ii.fit(train)
        rec_users = test['user'].sample(50).unique()
        all_recs[(i + 1, 'II')] = batch.recommend(a_ii, rec_users, 25)
        all_test[i + 1] = test

    recs = pd.concat(all_recs, names=['part', 'algo'])
    recs.reset_index(['part', 'algo'], inplace=True)
    recs.reset_index(drop=True, inplace=True)

    test = pd.concat(all_test, names=['part'])
    test.reset_index(['part'], inplace=True)
    test.reset_index(drop=True, inplace=True)

    scores = rla.compute(recs, test, include_missing=True)
    inames = scores.index.names
    scores.sort_index(inplace=True)
    assert len(scores) == 50 * 4
    assert all(scores['ntruth'] == 5)
    assert scores['recall'].isna().sum() > 0
    _log.info('scores:\n%s', scores)

    ucounts = scores.reset_index().groupby('algo')['user'].agg(
        ['count', 'nunique'])
    assert all(ucounts['count'] == 100)
    assert all(ucounts['nunique'] == 100)

    mscores = rla.compute(recs, test)
    mscores = mscores.reset_index().set_index(inames)
    mscores.sort_index(inplace=True)
    assert len(mscores) < len(scores)
    _log.info('mscores:\n%s', mscores)

    recall = scores.loc[scores['recall'].notna(), 'recall'].copy()
    recall, mrecall = recall.align(mscores['recall'])
    assert all(recall == mrecall)
예제 #3
0
파일: Fallbacks.py 프로젝트: ordosnb/KNN
 def __init__(self,
              nnbrs,
              min_nbrs=1,
              min_sim=0,
              center=True,
              aggregate='weighted-average'):
     algo = UserUser(nnbrs, min_nbrs, min_sim, center, aggregate)
     fallback = Bias()
     Fallback.__init__(self, [algo, fallback])
 def __init__(self, mysql):
     self.mysql = mysql
     self.connection = mysql.get_connection()
     self.movies = self.read_table(
         """select * from movielenstable WHERE title IS NOT NULL AND genres IS NOT NULL;"""
     )
     self.movies.columns = ['item', 'title', 'genres']
     self.ratings = self.read_table(
         """select * from lensratings WHERE rating IS NOT NULL;""")
     self.ratings.columns = ['user', 'item', 'rating']
     self.user_user = UserUser(15, min_nbrs=3)
     self.algorithm = Recommender.adapt(self.user_user)
     self.algorithm.fit(self.ratings)
예제 #5
0
    def predictRatingForUnseenMovies(self, userMovieRatings,
                                     predictConfigDict):

        numOfRecom, maxNumOfNeigh, minNumOfNeigh = \
            self._validatePredictConfig(predictConfigDict)

        if None in (numOfRecom, maxNumOfNeigh, minNumOfNeigh):
            return False, None

        userUser = UserUser(maxNumOfNeigh, min_nbrs=minNumOfNeigh)
        algo = Recommender.adapt(userUser)
        algo.fit(self.movieDataset.ratings)

        userRecom = algo.recommend(self.NON_EXISTING_USER,
                                   numOfRecom,
                                   ratings=pd.Series(userMovieRatings))

        return True, userRecom
예제 #6
0
def default(implicit):
    if implicit:
        return UserUser(30, aggregate='sum', center=False)
    else:
        return UserUser(30)
예제 #7
0
def instantiate(opts, implicit):
    nnbrs, smin = opts
    if implicit:
        return UserUser(nnbrs, min_sim=smin, aggregate='sum', center=False)
    else:
        return UserUser(nnbrs, min_sim=smin)
예제 #8
0
We're guiding how the algorithm decides whether a particular group of users should be clustered together by setting a minimum and maximum neighborhood size. These parameters modify the result of the algorithm.

Really small clusters represent groups of people who aren't very similar to a lot of others. So by keeping cluster size small, we'll see more unconventional recommendations. But increasing our minimum cluster size, will probably give more conventionally popular recommendations. 

Right now, we set the minimum to 3 and the maximum to 15, so the algorithm won't define a cluster unless it has at least 3 users, and it will use the 15 closest users (at most) to make rating predictions. 

**Step 4.1**
"""

from lenskit.algorithms import Recommender
from lenskit.algorithms.user_knn import UserUser

num_recs = 10  #<---- This is the number of recommendations to generate. You can change this if you want to see more recommendations

user_user = UserUser(15, min_nbrs=3) #These two numbers set the minimum (3) and maximum (15) number of neighbors to consider. These are considered "reasonable defaults," but you can experiment with others too
algo = Recommender.adapt(user_user)
algo.fit(data.ratings)

print("Set up a User-User algorithm!")

"""Now that the system has defined clusters, we can give it our personal ratings to get the top 10 recommended movies for USER1 and USER2.

For each of us, the User-User algorithm will find a neighborhood of users similar to us based on their movie ratings. It will look at movies that these similar users have rated that we haven't seen yet. Based on their ratings, it will predict how we may rate that movie if we watched it. Finally, it will order these predictions and print them in descending order to give our "top 10."

**Step 4.2**
"""

jabril_recs = algo.recommend(-1, num_recs, ratings=pd.Series(jabril_rating_dict))  #Here, -1 tells it that it's not an existing user in the set, that we're giving new ratings, while 10 is how many recommendations it should generate

joined_data = jabril_recs.join(data.movies['genres'], on='item')      
 def generate_model(data, min_neighbours, max_neighbours):
     user_user = UserUser(max_neighbours, min_nbrs=min_neighbours)
     algo = Recommender.adapt(user_user)
     algo.fit(data.ratings)
     return algo
from lenskit.algorithms.user_knn import UserUser
from recsys.cf.usercf import UserCF
from recsys.utils.data import load_movielen_data
from recsys.utils.debug import Timer, LogUtil

LogUtil.configLog()
ratings, users, movies = load_movielen_data()
model0 = UserCF(min_threshold=0.1, min_nn=5, max_nn=20)
model0.fit(ratings)

model = UserUser(nnbrs=20, min_nbrs=5, min_sim=0.1, center=False)
model.fit(ratings)

user = 1
movies = list(movies.item.astype(int))
movies = [1]
clock = Timer()
for _ in range(5):
    df = model.predict_for_user(user, movies)
    print(clock.restart())

print("=" * 60)

for _ in range(5):
    df0 = model0.predict_for_user(user, movies)
    print(clock.restart())

print(df.describe())
print(df0.describe())