예제 #1
0
def run_model(personality):

    reader = Reader(sep=',', skip_lines=0, rating_scale=(0.0, 1.0))
    df = pd.DataFrame(json_to_pandas())

    new = pd.DataFrame(user_to_dfrows(len(df.index), personality))
    df = df.append(new)
    data = Dataset.load_from_df(df[['user', 'trait', 'percentile']],
                                reader=reader)

    # pdb.set_trace()
    trainset = data.build_full_trainset()
    # Use user_based true/false to switch between user-based or item-based collaborative filtering
    # algo = KNNWithMeans(k=40, sim_options={
    # 'name': 'pearson_baseline', 'user_based': False})
    algo = SVD()
    # algo.fit(trainset)
    algo.fit(trainset)
    # exit(1)
    testset = trainset.build_anti_testset()
    predictions = algo.test(testset)

    import heapq
    heap = []
    for prediction in predictions:
        heap.append((prediction[3], prediction[1]))
    heapq.heapify(heap)
    return heapq.nlargest(3, heap)
예제 #2
0
def movie_rater(movie_df, num=5, genre=None):
    """ This function is to handle a cold start with a new user.  It takes in a number of ratings
        from a new user and gives the output of 5 movie recommendations.
        
        Args:
            movie_df(dataframe): the dataframe of movies that you will use to recommend movies
            num(integer): the number of ratings you want the user to input before giving a recommendation. The default value is 5.
            genre(string): The genre of movies that you wish to pull from for your user to rate.  The default is None.
        
        Returns:
            The output is a list of 5 movies with their titles and genres receommended for the user based on their initial ratings given.  
            A collaborative filter is used to add their ratings to the inital dataframe to then find this output."""
    userID = 1000
    rating_list = []
    while num > 0:
        if genre:
            movie = popular_movies_df[popular_movies_df['genres'].str.contains(
                genre)].sample(1)
        else:
            movie = popular_movies_df.sample(1)
        print(movie['title'])
        try:
            rating = input(
                'How do you rate this movie on a scale of (low)1-5(high). Press n if you have not seen this movie: \n'
            )
            if rating == 'n':
                continue
            else:
                rating_one_movie = {
                    'userId': userID,
                    'movieId': movie['movieId'].values[0],
                    'rating': rating
                }
                rating_list.append(rating_one_movie)
                num -= 1
        except:
            continue
    new_ratings_df = ratings_df.append(rating_list, ignore_index=True)
    new_data = Dataset.load_from_df(new_ratings_df, reader)
    svd_ = SVD(n_factors=100, n_epochs=30, lr_all=0.01, reg_all=0.1)
    svd_.fit(new_data.build_full_trainset())
    list_of_movies = []
    for m_id in ratings_df['movieId'].unique():
        list_of_movies.append((m_id, svd_.predict(1000, m_id)[3]))
    ranked_movies = sorted(list_of_movies, key=lambda x: x[1], reverse=True)
    n = 5
    for idx, rec in enumerate(ranked_movies):
        title = movie_df.loc[movie_df['movieId'] == int(rec[0])]['title']
        print('------------------------------------------------')
        print('Recommendation # ', idx + 1, ': ', title, '\n')
        n -= 1
        if n == 0:
            break

    return
def initialize_and_fit_model(data):
    """
    This function will instantialize and fit the model we choose for 
    our program on our data(including the new user data)
    
    returns:
     - a model that has been fit on our data(including the new user data)
    """

    from surprise.prediction_algorithms import SVD
    svd = SVD(n_factors=50, reg_all=0.05)
    return svd.fit(data.build_full_trainset())
    def singular_value_decomposition(self, n_factors, reg_all):

        # build and fit full SVD training set
        current_utility_matrix = self.current_utility_matrix()
        reader = Reader(rating_scale=(1, 5))
        data = Dataset.load_from_df(
            current_utility_matrix[['User', 'URL', 'Rating']], reader)
        dataset = data.build_full_trainset()
        algo = SVD(n_factors=n_factors, reg_all=reg_all)
        algo.fit(dataset)

        # calculate SVD predictions for local user
        recommendations = current_utility_matrix.drop(
            ['User', 'Rating'], axis=1).drop_duplicates()
        recommendations['SVD'] = recommendations['URL'].apply(
            lambda x: algo.predict(self.current_user, x)[3])
        recommendations = recommendations.sort_values(by='SVD',
                                                      ascending=False)['URL']

        new_recommendation = self.append_new_recommendation(
            recommendations, 'Singular Value Decomposition')

        return new_recommendation
예제 #5
0
from surprise import Dataset
from surprise import Reader
from surprise.prediction_algorithms import SVD ,SVDpp  
from surprise import accuracy
from surprise.model_selection import KFold
import pandas as pd
import  os

reader = Reader(line_format='user item rating', sep=',', skip_lines=1)
data = Dataset.load_from_file( "./ratings.csv" , reader = reader) 
from surprise.model_selection import train_test_split 

x_train , x_test = train_test_split( data ,test_size = 0.2 ,random_state = 10000 )
svd = SVD(biased= False) 
svd.fit(x_train)

prediction = svd.test(x_test)
accuracy.rmse( predictions=prediction )
# RMSE: 0.8548
# 0.8547798833361556

import pandas as pd 
import numpy as np
datas = pd.read_csv("ratings.csv" ,delimiter="," , skiprows=1  , names  =["user" ,"item" ,"rating"] ,usecols= [0,1,2] )
datas["user"] = datas["user"].astype(np.int32)
datas["item"] = datas["item"].astype(np.int32)
datas["rating"] = datas["rating"].astype(np.int32)
print(datas.dtypes )

reader = Reader(line_format='user item rating' )
예제 #6
0
from surprise.prediction_algorithms import SVD
from surprise import accuracy
popular_movies_df = pd.read_csv('popular_movies.csv')
ratings_df = pd.read_csv('ratings_limited_users.csv',
                         usecols=['userId', 'movieId', 'rating'])
movies_df = pd.read_csv('movies.csv')
# Initializing a reader and data class
reader = Reader()
data = Dataset.load_from_df(ratings_df, reader)

# Splitting the data into train and test sets
trainset, testset = train_test_split(data, test_size=.25)

# Using the tuned parameters for the SVD model
svd = SVD(n_factors=100, n_epochs=30, lr_all=0.01, reg_all=0.1)
svd.fit(trainset)
svd_preds = svd.test(testset)


# Function to get new users preferences on any movie or a particular genre
def movie_rater(movie_df, num=5, genre=None):
    """ This function is to handle a cold start with a new user.  It takes in a number of ratings
        from a new user and gives the output of 5 movie recommendations.
        
        Args:
            movie_df(dataframe): the dataframe of movies that you will use to recommend movies
            num(integer): the number of ratings you want the user to input before giving a recommendation. The default value is 5.
            genre(string): The genre of movies that you wish to pull from for your user to rate.  The default is None.
        
        Returns:
            The output is a list of 5 movies with their titles and genres receommended for the user based on their initial ratings given.  
예제 #7
0
            'userId': 1000,
            'movieId': rating_movie['movieId'].values[0],
            'rating': float(rating) / 2
        }
        rating_list.append(rating_one_movie)
        n -= 1

# Make Predictions
reader = Reader()
new_ratings = ratings.append(rating_list, ignore_index=True)
data = Dataset.load_from_df(new_ratings, reader).build_full_trainset()

#Model
print('\n working.... \n')
svd = SVD(n_factors=100, n_epochs=35, lr_all=0.007, reg_all=0.07)
svd.fit(data)

# Gather and sort recommendations
recommendation_list = []
for m_id in movies['movieId']:
    recommendation_list.append((m_id, 2 * svd.predict(1000, m_id)[3]))

ranked_recommendations = sorted(recommendation_list,
                                key=lambda x: x[1],
                                reverse=True)

# Deliver Results
print('\n', 'Success!', '\n')
X = int(input('How many movie recommendations would you like to see? '))

i = 0