Ejemplo n.º 1
0
def run_model(personality):

    reader = Reader(sep=',', skip_lines=0, rating_scale=(0.0, 1.0))
    df = pd.DataFrame(json_to_pandas())

    new = pd.DataFrame(user_to_dfrows(len(df.index), personality))
    df = df.append(new)
    data = Dataset.load_from_df(df[['user', 'trait', 'percentile']],
                                reader=reader)

    # pdb.set_trace()
    trainset = data.build_full_trainset()
    # Use user_based true/false to switch between user-based or item-based collaborative filtering
    # algo = KNNWithMeans(k=40, sim_options={
    # 'name': 'pearson_baseline', 'user_based': False})
    algo = SVD()
    # algo.fit(trainset)
    algo.fit(trainset)
    # exit(1)
    testset = trainset.build_anti_testset()
    predictions = algo.test(testset)

    import heapq
    heap = []
    for prediction in predictions:
        heap.append((prediction[3], prediction[1]))
    heapq.heapify(heap)
    return heapq.nlargest(3, heap)
Ejemplo n.º 2
0
def movie_rater(movie_df, num=5, genre=None):
    """ This function is to handle a cold start with a new user.  It takes in a number of ratings
        from a new user and gives the output of 5 movie recommendations.
        
        Args:
            movie_df(dataframe): the dataframe of movies that you will use to recommend movies
            num(integer): the number of ratings you want the user to input before giving a recommendation. The default value is 5.
            genre(string): The genre of movies that you wish to pull from for your user to rate.  The default is None.
        
        Returns:
            The output is a list of 5 movies with their titles and genres receommended for the user based on their initial ratings given.  
            A collaborative filter is used to add their ratings to the inital dataframe to then find this output."""
    userID = 1000
    rating_list = []
    while num > 0:
        if genre:
            movie = popular_movies_df[popular_movies_df['genres'].str.contains(
                genre)].sample(1)
        else:
            movie = popular_movies_df.sample(1)
        print(movie['title'])
        try:
            rating = input(
                'How do you rate this movie on a scale of (low)1-5(high). Press n if you have not seen this movie: \n'
            )
            if rating == 'n':
                continue
            else:
                rating_one_movie = {
                    'userId': userID,
                    'movieId': movie['movieId'].values[0],
                    'rating': rating
                }
                rating_list.append(rating_one_movie)
                num -= 1
        except:
            continue
    new_ratings_df = ratings_df.append(rating_list, ignore_index=True)
    new_data = Dataset.load_from_df(new_ratings_df, reader)
    svd_ = SVD(n_factors=100, n_epochs=30, lr_all=0.01, reg_all=0.1)
    svd_.fit(new_data.build_full_trainset())
    list_of_movies = []
    for m_id in ratings_df['movieId'].unique():
        list_of_movies.append((m_id, svd_.predict(1000, m_id)[3]))
    ranked_movies = sorted(list_of_movies, key=lambda x: x[1], reverse=True)
    n = 5
    for idx, rec in enumerate(ranked_movies):
        title = movie_df.loc[movie_df['movieId'] == int(rec[0])]['title']
        print('------------------------------------------------')
        print('Recommendation # ', idx + 1, ': ', title, '\n')
        n -= 1
        if n == 0:
            break

    return
def initialize_and_fit_model(data):
    """
    This function will instantialize and fit the model we choose for 
    our program on our data(including the new user data)
    
    returns:
     - a model that has been fit on our data(including the new user data)
    """

    from surprise.prediction_algorithms import SVD
    svd = SVD(n_factors=50, reg_all=0.05)
    return svd.fit(data.build_full_trainset())
    def singular_value_decomposition(self, n_factors, reg_all):

        # build and fit full SVD training set
        current_utility_matrix = self.current_utility_matrix()
        reader = Reader(rating_scale=(1, 5))
        data = Dataset.load_from_df(
            current_utility_matrix[['User', 'URL', 'Rating']], reader)
        dataset = data.build_full_trainset()
        algo = SVD(n_factors=n_factors, reg_all=reg_all)
        algo.fit(dataset)

        # calculate SVD predictions for local user
        recommendations = current_utility_matrix.drop(
            ['User', 'Rating'], axis=1).drop_duplicates()
        recommendations['SVD'] = recommendations['URL'].apply(
            lambda x: algo.predict(self.current_user, x)[3])
        recommendations = recommendations.sort_values(by='SVD',
                                                      ascending=False)['URL']

        new_recommendation = self.append_new_recommendation(
            recommendations, 'Singular Value Decomposition')

        return new_recommendation
    # Mean absolute error.
    mae = accuracy.mae(predictions)

    df_predicted = pd.DataFrame(columns=["uid", "iid", "predicted", "actual"])
    for prediction in predictions:
        df_predicted = df_predicted.append(
            {
                "uid": prediction.uid,
                "iid": prediction.iid,
                "predicted": prediction.est,
                "actual": df_swipes[prediction.uid].loc[prediction.iid]
            },
            ignore_index=True
        )

    acc_dict = {"algname": algname, "n_train": n_train, "n_users": n_users, "acc": mae}
    print(acc_dict)
    return acc_dict


if __name__ == "__main__":
    # Save accuracy data for all swipe values and user values.
    df_acc = pd.DataFrame(columns=["algname", "n_train", "n_users", "acc"])

    max_swipes = 210
    for alg, algname in tqdm(zip([SVD(), NMF(), KNNWithMeans()], ["SVD", "NMF", "KNNWithMeans"])):
        for n_users in range(2, len(users)):
            for n_train in range(10, max_swipes, 10):
                df_acc = df_acc.append(acc(df_swipes, alg, algname, n_train, n_users), ignore_index=True)
            df_acc.to_csv("acc_organic.csv", index=False)
                "iid": prediction.iid,
                "predicted": prediction.est,
                "actual": df_swipes[prediction.uid].loc[prediction.iid]
            },
            ignore_index=True)

    acc_dict = {
        "algname": algname,
        "n_train": n_train,
        "n_users": n_users,
        "acc": mae
    }
    print(acc_dict)
    return acc_dict


if __name__ == "__main__":
    # Save accuracy data for all swipe values and user values.
    df_acc = pd.DataFrame(columns=["algname", "n_train", "n_users", "acc"])

    max_swipes = 210
    for alg, algname in tqdm(
            zip([SVD(), NMF(), KNNWithMeans()],
                ["SVD", "NMF", "KNNWithMeans"])):
        for n_users in [5, 10, 25, 50, 100, 250, 500, 1000]:
            for n_train in range(10, max_swipes, 10):
                df_acc = df_acc.append(acc(df_swipes, alg, algname, n_train,
                                           n_users),
                                       ignore_index=True)
            df_acc.to_csv("acc.csv", index=False)
Ejemplo n.º 7
0
def test_SVD_parameters():
    """Ensure that all parameters are taken into account."""

    # The baseline against which to compare.
    algo = SVD(n_factors=1, n_epochs=1)
    rmse_default = evaluate(algo, data, measures=['rmse'])['rmse']

    # n_factors
    algo = SVD(n_factors=2, n_epochs=1)
    rmse_factors = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_factors

    # n_epochs
    algo = SVD(n_factors=1, n_epochs=2)
    rmse_n_epochs = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_n_epochs

    # lr_all
    algo = SVD(n_factors=1, n_epochs=1, lr_all=5)
    rmse_lr_all = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_lr_all

    # reg_all
    algo = SVD(n_factors=1, n_epochs=1, reg_all=5)
    rmse_reg_all = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_reg_all

    # lr_bu
    algo = SVD(n_factors=1, n_epochs=1, lr_bu=5)
    rmse_lr_bu = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_lr_bu

    # lr_bi
    algo = SVD(n_factors=1, n_epochs=1, lr_bi=5)
    rmse_lr_bi = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_lr_bi

    # lr_pu
    algo = SVD(n_factors=1, n_epochs=1, lr_pu=5)
    rmse_lr_pu = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_lr_pu

    # lr_qi
    algo = SVD(n_factors=1, n_epochs=1, lr_qi=5)
    rmse_lr_qi = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_lr_qi

    # reg_bu
    algo = SVD(n_factors=1, n_epochs=1, reg_bu=5)
    rmse_reg_bu = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_reg_bu

    # reg_bi
    algo = SVD(n_factors=1, n_epochs=1, reg_bi=5)
    rmse_reg_bi = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_reg_bi

    # reg_pu
    algo = SVD(n_factors=1, n_epochs=1, reg_pu=5)
    rmse_reg_pu = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_reg_pu

    # reg_qi
    algo = SVD(n_factors=1, n_epochs=1, reg_qi=5)
    rmse_reg_qi = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_reg_qi
Ejemplo n.º 8
0
"""使用surprise SVD系列算法推荐"""

from surprise import Dataset
from surprise import Reader
from surprise.prediction_algorithms import SVD ,SVDpp  
from surprise import accuracy
from surprise.model_selection import KFold
import pandas as pd
import  os

reader = Reader(line_format='user item rating', sep=',', skip_lines=1)
data = Dataset.load_from_file( "./ratings.csv" , reader = reader) 
from surprise.model_selection import train_test_split 

x_train , x_test = train_test_split( data ,test_size = 0.2 ,random_state = 10000 )
svd = SVD(biased= False) 
svd.fit(x_train)

prediction = svd.test(x_test)
accuracy.rmse( predictions=prediction )
# RMSE: 0.8548
# 0.8547798833361556

import pandas as pd 
import numpy as np
datas = pd.read_csv("ratings.csv" ,delimiter="," , skiprows=1  , names  =["user" ,"item" ,"rating"] ,usecols= [0,1,2] )
datas["user"] = datas["user"].astype(np.int32)
datas["item"] = datas["item"].astype(np.int32)
datas["rating"] = datas["rating"].astype(np.int32)
print(datas.dtypes )
Ejemplo n.º 9
0
from surprise.model_selection import cross_validate, train_test_split
from surprise.prediction_algorithms import SVD
from surprise import accuracy
popular_movies_df = pd.read_csv('popular_movies.csv')
ratings_df = pd.read_csv('ratings_limited_users.csv',
                         usecols=['userId', 'movieId', 'rating'])
movies_df = pd.read_csv('movies.csv')
# Initializing a reader and data class
reader = Reader()
data = Dataset.load_from_df(ratings_df, reader)

# Splitting the data into train and test sets
trainset, testset = train_test_split(data, test_size=.25)

# Using the tuned parameters for the SVD model
svd = SVD(n_factors=100, n_epochs=30, lr_all=0.01, reg_all=0.1)
svd.fit(trainset)
svd_preds = svd.test(testset)


# Function to get new users preferences on any movie or a particular genre
def movie_rater(movie_df, num=5, genre=None):
    """ This function is to handle a cold start with a new user.  It takes in a number of ratings
        from a new user and gives the output of 5 movie recommendations.
        
        Args:
            movie_df(dataframe): the dataframe of movies that you will use to recommend movies
            num(integer): the number of ratings you want the user to input before giving a recommendation. The default value is 5.
            genre(string): The genre of movies that you wish to pull from for your user to rate.  The default is None.
        
        Returns:
Ejemplo n.º 10
0
        rating_one_movie = {
            'userId': 1000,
            'movieId': rating_movie['movieId'].values[0],
            'rating': float(rating) / 2
        }
        rating_list.append(rating_one_movie)
        n -= 1

# Make Predictions
reader = Reader()
new_ratings = ratings.append(rating_list, ignore_index=True)
data = Dataset.load_from_df(new_ratings, reader).build_full_trainset()

#Model
print('\n working.... \n')
svd = SVD(n_factors=100, n_epochs=35, lr_all=0.007, reg_all=0.07)
svd.fit(data)

# Gather and sort recommendations
recommendation_list = []
for m_id in movies['movieId']:
    recommendation_list.append((m_id, 2 * svd.predict(1000, m_id)[3]))

ranked_recommendations = sorted(recommendation_list,
                                key=lambda x: x[1],
                                reverse=True)

# Deliver Results
print('\n', 'Success!', '\n')
X = int(input('How many movie recommendations would you like to see? '))
Ejemplo n.º 11
0
reader = surprise.Reader(rating_scale=(0, 1))
data = surprise.Dataset.load_from_df(sparse, reader)
'''
for alg in [SVD(), NMF(), KNNWithMeans()]:
    output = alg.fit(data.build_full_trainset())

    preds={}
    for name in names:
        preds[name] = sorted([(i, alg.predict(uid=name, iid=str(i)).est) for i in complement_ids[name]], key=lambda x: x[1], reverse=True)

    print(preds)
'''

cutoff = .5
trainset, testset = train_test_split(data, test_size=0.25)
for alg in [SVD(), NMF(), KNNWithMeans()]:
    alg.fit(trainset)
    predictions = alg.test(testset)

    # Change predictions to binary choice of left or right. Prediction class derives from NamedTuple.
    predictions = [
        Prediction(prediction.uid, prediction.iid, prediction.r_ui,
                   int(prediction.est < cutoff), prediction.details)
        for prediction in predictions
    ]
    # print(predictions)
    accuracy.mae(predictions)

    df_predicted = pd.DataFrame(columns=["uid", "iid", "predicted", "actual"])
    for prediction in predictions:
        df_predicted = df_predicted.append(