def run_model(personality): reader = Reader(sep=',', skip_lines=0, rating_scale=(0.0, 1.0)) df = pd.DataFrame(json_to_pandas()) new = pd.DataFrame(user_to_dfrows(len(df.index), personality)) df = df.append(new) data = Dataset.load_from_df(df[['user', 'trait', 'percentile']], reader=reader) # pdb.set_trace() trainset = data.build_full_trainset() # Use user_based true/false to switch between user-based or item-based collaborative filtering # algo = KNNWithMeans(k=40, sim_options={ # 'name': 'pearson_baseline', 'user_based': False}) algo = SVD() # algo.fit(trainset) algo.fit(trainset) # exit(1) testset = trainset.build_anti_testset() predictions = algo.test(testset) import heapq heap = [] for prediction in predictions: heap.append((prediction[3], prediction[1])) heapq.heapify(heap) return heapq.nlargest(3, heap)
from surprise import Reader from surprise.prediction_algorithms import SVD ,SVDpp from surprise import accuracy from surprise.model_selection import KFold import pandas as pd import os reader = Reader(line_format='user item rating', sep=',', skip_lines=1) data = Dataset.load_from_file( "./ratings.csv" , reader = reader) from surprise.model_selection import train_test_split x_train , x_test = train_test_split( data ,test_size = 0.2 ,random_state = 10000 ) svd = SVD(biased= False) svd.fit(x_train) prediction = svd.test(x_test) accuracy.rmse( predictions=prediction ) # RMSE: 0.8548 # 0.8547798833361556 import pandas as pd import numpy as np datas = pd.read_csv("ratings.csv" ,delimiter="," , skiprows=1 , names =["user" ,"item" ,"rating"] ,usecols= [0,1,2] ) datas["user"] = datas["user"].astype(np.int32) datas["item"] = datas["item"].astype(np.int32) datas["rating"] = datas["rating"].astype(np.int32) print(datas.dtypes ) reader = Reader(line_format='user item rating' ) data = Dataset.load_from_df( datas ,reader ) from surprise.model_selection import train_test_split
from surprise import accuracy popular_movies_df = pd.read_csv('popular_movies.csv') ratings_df = pd.read_csv('ratings_limited_users.csv', usecols=['userId', 'movieId', 'rating']) movies_df = pd.read_csv('movies.csv') # Initializing a reader and data class reader = Reader() data = Dataset.load_from_df(ratings_df, reader) # Splitting the data into train and test sets trainset, testset = train_test_split(data, test_size=.25) # Using the tuned parameters for the SVD model svd = SVD(n_factors=100, n_epochs=30, lr_all=0.01, reg_all=0.1) svd.fit(trainset) svd_preds = svd.test(testset) # Function to get new users preferences on any movie or a particular genre def movie_rater(movie_df, num=5, genre=None): """ This function is to handle a cold start with a new user. It takes in a number of ratings from a new user and gives the output of 5 movie recommendations. Args: movie_df(dataframe): the dataframe of movies that you will use to recommend movies num(integer): the number of ratings you want the user to input before giving a recommendation. The default value is 5. genre(string): The genre of movies that you wish to pull from for your user to rate. The default is None. Returns: The output is a list of 5 movies with their titles and genres receommended for the user based on their initial ratings given. A collaborative filter is used to add their ratings to the inital dataframe to then find this output."""