class SVDModel: def __init__(self): self.model = SVD() self.name = 'Singular Value Decomposition' def best_estimator_gridsearchCV(self, data, n_epochs=[5, 10], lr_all=[0.002, 0.005], reg_all=[0.4, 0.5], cv=3): param_grid = { 'n_epochs': n_epochs, 'lr_all': lr_all, 'reg_all': reg_all } gs = GridSearchCV(self.model, param_grid, measures=['rmse'], cv=cv) gs.fit(data) gs.best_params['rmse'] return params def train(self, *args, **kwargs): self.model.fit(*args, **kwargs) def predict(self, *args, **kwargs): self.model.predict(*args, **kwargs) def test(self, *args, **kwargs): return self.model.test(*args, **kwargs)
def fill_missing_svd(df_train,df_test): df = pd.read_csv( '/Users/ronlitman/Ronlitman/University/Statistic/שנה א׳ - סמט׳ א׳/למידה סטטיסטית/Netflix/df_join.csv') df = df[df.iid != 100] reader = Reader(rating_scale=(1.0, 5.0)) data = Dataset.load_from_df(df[['uid', 'iid', 'rating']], reader) trainset = data.build_full_trainset() algo = SVD() print('fitting SVD') algo.fit(trainset) print('filling train set') for i in range(df_train.shape[0]): for j in range(df_train.shape[1]): if (df_train.iloc[i,j] == 0): df_train.iloc[i, j] = (algo.predict(i, j).est) print('filling test set') for i in range(df_test.shape[0]): for j in range(df_test.shape[1]): if (df_test.iloc[i,j] == 0): df_test.iloc[i, j] = (algo.predict(i + 10000, j).est) return df_train, df_test
def get_recommended_movies(user_id): ''' This function is to get top 10 recommended movies based on user similarities Input: user id (integer) Output: A dataframe contains 10 recommended movies (information about these movie included) ''' already_watched = list(RATINGS[RATINGS['userId'] == user_id]['movieId']) predicted_est = {} id_set = [] for i in MOVIE_ID_SORT: if i not in already_watched: predicted_est[i] = SVD.predict(user_id, i).est else: predicted_est[i] = 0 predicted_est = sorted(predicted_est.items(), key=lambda x: x[1], reverse=True) for i in predicted_est: if len(id_set) < 10: if i[0] in FULL_META['id'].to_list(): id_set.append(i[0]) recommendation = FULL_META[FULL_META['id'].isin(id_set)][[ 'title', 'id' ]].values.tolist() return recommendation
def boost(examples, rounds=10): distr = normalize([1.] * l) hypotheses = [None] * rounds alpha = [0] * rounds for t in range(rounds): #create a training set based on the weight distribution for i in range(l): examples[i] = examples[draw(distr)] # create a trainset object reader = Reader() data = Dataset.load_from_df(examples, reader) trainset = data.build_full_trainset() # Use SVD with surprise algo = SVD()algo.train(trainset) hypotheses[t] = algo for i in range(l): abserr[i] = math.abs(examples.at[i,'rating'] - algo.predict(examples.at[i,'user_id'],examples.at[i,'business_id']).est) # update weights delta = sum(x*y for x,y in zip(distr,abserr) if abserr > delta) hypRes = np.where(abserr > delta,-1,1) alpha[t] = 0.5 * math.log((1 - delta) / (.0001 + delta)) distr = normalize([d * math.exp(-alpha[t] * h) for (d,h) in zip(distr, hypRes)]) def finalHypothesis(x): return sign(sum(a * h(x) for (a, h) in zip(alpha, hypotheses))) return finalHypothesis
def personalized_shows(username): ratings = pd.read_csv(rating_path) reader = Reader() data = Dataset.load_from_df(ratings[['username', 'show_id', 'rating']], reader) data.split(n_folds=10) svd = SVD() evaluate(svd, data, measures=['RMSE']) temp = [] obj = Show.objects.all() for i in obj: temp.append( [i.show_id, i.show_title, svd.predict(username, i.show_id).est]) temp.sort(key=lambda x: x[2], reverse=True) ans = [] rated = Show_Rating.objects.filter(username=username) already_rated = [] for i in rated: already_rated.append(i.show_id) j = 0 for i in temp: if (j > 11): break if (i[0] not in already_rated): ans.append(i[1]) j += 1 final = [] for i in ans: final.append(get_show_details(i)) return final
def get_collab_recommendation(dataset, userid): dataframe = pd.DataFrame(dataset) reader = Reader(rating_scale=(1, 5)) dataset = Dataset.load_from_df(dataframe[['userId', 'itemId', 'rating']], reader) trainset = dataset.build_full_trainset() algo = SVD() algo.fit(trainset) user_ratings = np.zeros(trainset.n_items) for index, row in dataframe[dataframe['userId'] == userid].iterrows(): user_ratings[row['itemId']] = row['rating'] result = [] for index, x in enumerate(user_ratings): if x == 0: prediction = algo.predict(userid, index) # Append (itemID, pred_rating) result.append((index, prediction.est)) result.sort(key=lambda tup: tup[1], reverse=True) result = result[:9] result = [i for i, x in result] return result
class CollaborativeFiltering: def __init__(self, rating_df, user_df, movie_df, movie_sim_matrix=None): self._df = movie_df self._rating_df = rating_df self._movie_sim = movie_sim_matrix self._theta_m_u = np.zeros([movie_df.shape[0], user_df.shape[0]], dtype=np.float32) # self._x_m_n = np.zeros([df.shape[0], feature_n], dtype=np.float32) self._movie_indices = pd.Series(movie_df.index, index=movie_df['id']) self._user_indices = pd.Series(user_df.index, index=user_df['id']) # 构造反向映射 self._algo = SVD() def _get_sim_user(self, user_id, top_n=10): pass def _get_sim_movie(self, movie_id, top_n=10): pass def calculate(self): # for item in self._rating_df.to_numpy(): # self._theta_m_u[self._movie_indices[int(item[0])], self._user_indices[int(item[1])]] = item[2] # cosine_similarity(self._theta_m_u.T, self._theta_m_u.T) # cosine_similarity(self._theta_m_u, self._theta_m_u) reader = Reader() data = Dataset.load_from_df( self._rating_df[['user_id', 'movie_id', 'score']], reader) trainset = data.build_full_trainset() self._algo.fit(trainset) def get_results(self, user_id, movie_id): return self._algo.predict(user_id, movie_id).est
def svd(data, kwargs): # Set algorithm n_factors = kwargs.get('k_features') n_epochs = kwargs.get('maxiter') lr_pu = kwargs.get('lr_pu') lr_qi = kwargs.get('lr_qi') reg_bu = kwargs.get('reg_bu') reg_qi = kwargs.get('reg_qi') algo = SVD(n_factors[0], n_epochs, lr_pu[0] , lr_qi[0] , reg_bu[0] , reg_qi[0] , random_state = kwargs['random_seed'] ) # Train the algorithm on the data, and predict ratings for the testset algo.fit(data) # Predict the full matrix prediction = np.zeros([10000,1000]) for row in range(10000): for col in range(1000): prediction[row,col] = algo.predict(str(row+1),str(col+1)).est return prediction
def generate_recommendations(): file_path = os.path.expanduser('./data/reviews_stars.csv') reader = Reader(line_format='user item rating', sep=',') data = Dataset.load_from_file(file_path, reader=reader) trainset = data.build_full_trainset() algo = SVD(n_factors=5, n_epochs=25, lr_all=0.006, reg_all=0.2, biased=True) algo.fit(trainset) df = pd.read_csv('./data/reviews_stars.csv', header=None, names=['user', 'business', 'review']) businesses = df['business'].unique() users = df.groupby('user')['business'].nunique() with open('./data/collaborative_recomendations.csv', 'w') as file: for user, user_count in users.items(): for business in businesses: if user_count > 12: pred = algo.predict(user, business) if pred.est > 4.2: file.write(user + ',' + business + ',' + str(pred.est) + '\n') print(user)
def predict_movie(user_id, movie_list, df): # view historical preference of the user temp_usr = df[(df['Cust_Id'] == user_id) & (df['Rating'] == 5)] temp_usr = temp_usr.set_index('Movie_Id') temp_usr = temp_usr.join(df_title)['Name'] print("Movies Previously liked by user.....................") print(temp_usr[:10]) # create svd model to predict movies for user user = movie_list.copy() user = user.reset_index() user = user[~user['Movie_Id'].isin(drop_movie_list)] # getting dataset reader = Reader() data = Dataset.load_from_df( df[['Cust_Id', 'Movie_Id', 'Rating']][:1000000], reader) trainset = data.build_full_trainset() svd = SVD() svd.fit(trainset) user['Estimate_Score'] = user['Movie_Id'].apply( lambda x: svd.predict(user_id, x).est) user = user.drop('Movie_Id', axis=1) user = user.sort_values('Estimate_Score', ascending=False) print("Recommended Movies for User are as follows.........\n") print(user.head(10))
def train_test(df): ###################### train ###################### reader = Reader() svd = SVD() border_line = int(df.shape[0] / 5) data = Dataset.load_from_df( df[['Cust_Id', 'Movie_Id', 'Rating']][:-border_line], reader) # train 1: cross_validate # model_selection.cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=True) # train 2: not cross_validate trainset = data.build_full_trainset() svd.fit(trainset) ###################### test ###################### test_df = df.iloc[-border_line:] print('test_df Shape: {}'.format(test_df.shape)) data_matrix = np.array(test_df, dtype=np.int) Estimate_Score = [] for user in data_matrix: Score = svd.predict(user[0], user[2]).est Estimate_Score.append(Score) loss = RMSE(Estimate_Score, data_matrix[:, 1]) return loss
def get_new_user_recommend(ratings, movies, USER): movies['genres'] = movies['genres'].fillna('[]').apply(literal_eval).apply( lambda x: [i['name'] for i in x] if isinstance(x, list) else []) movies['year'] = (pd.to_datetime( movies['release_date'], errors='coerce').apply( lambda x: str(x).split('-')[0] if x != np.nan else np.nan)) movies.drop(movies.columns.difference( ['movieId', 'title', 'genres', 'year']), 1, inplace=True) movies.set_index('movieId', inplace=True) reader = Reader(rating_scale=(0.5, 5)) data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader) svd = SVD( ) #(n_factors=160, n_epochs=100, lr_all=0.005, reg_all=0.1) 0.86? #cross_validate(svd, data, measures=['RMSE', 'MAE'], cv = 5) user_ratings = ratings[(ratings['userId'] == USER)] user_ratings = user_ratings.set_index('movieId') user_ratings = user_ratings.join(movies) user_ratings.drop(user_ratings.columns.difference( ['movieId', 'title', 'genres', 'year']), 1, inplace=True) movies_cut = movies[~movies.isin(user_ratings)].dropna() trainset = data.build_full_trainset() svd.fit(trainset) file_name = os.path.abspath('app/static/dump_file') dump.dump(file_name, algo=svd) user_predict = movies_cut.copy() user_predict = user_predict.reset_index() user_predict['Estimate_Score'] = user_predict['movieId'].apply( lambda x: svd.predict(USER, x).est) user_predict = user_predict.sort_values('Estimate_Score', ascending=False) return user_predict
def surpriseTesting(): """scikit-surprise library testing""" # Load the movielens-100k dataset (download it if needed), # and split it into 3 folds for cross-validation. data = surprise.Dataset.load_builtin('ml-100k') # reader = surprise.Reader(line_format='user item rating', sep=',') # data = Dataset.load_from_file('temp.csv', reader=reader) trainSet = data.build_full_trainset() data.split(n_folds=3) for rating in data.build_full_trainset().all_ratings(): print(rating) print(trainSet.n_items) algo = SVD() # algo = KNNBasic() algo.fit(trainSet) # Evaluate performances of our algorithm on the dataset. perf = surprise.evaluate(algo, data, measures=['RMSE', 'MAE']) surprise.print_perf(perf) uid = str( 196) # raw user id (as in the ratings file). They are **strings**! iid = str( 242) # raw item id (as in the ratings file). They are **strings**! # get a prediction for specific users and items. pred = algo.predict(uid, iid, r_ui=-1, verbose=True) print(pred.est)
def PMFB(self): u_id = [] I_id = [] r_ui_ = np.array([]) _est = np.array([]) algo = SVD(n_factors=100, n_epochs=20, biased=True, lr_all=0.005, reg_all=0.02) algo.fit(self.trainset) for uid in (self.list): lids = self.data[self.data.uid == uid] a = self.data[self.data.uid == uid] for i in range(1, len(a)): lid = lids[i - 1:i].lid.values[0] r_ui = lids[i - 1:i].rate.values[0] pred = algo.predict(uid, lid, r_ui, verbose=True) u_id.append(int(pred.uid)) I_id.append(int(pred.iid)) r_ui_ = np.append(r_ui_, pred.r_ui) _est = np.append(_est, pred.est) self.df_est = pd.DataFrame({ 'uid': u_id, 'Iid': I_id, 'r_ui': r_ui_, 'est': _est }) self.arr = self.df_est['uid'].unique() self.PMFWB_ndcg_ = self.Calculate_NDCG()
def do(user_i, df): global item_base item_base = set(df["장소"]) reader = Reader(rating_scale=(1, 5)) data = Dataset.load_from_df(df=df, reader=reader) train = data.build_full_trainset() test = train.build_testset() model = SVD(n_factors=100, n_epochs=20) model.fit(train) L = [] actual_rating = 0 for item_id in item_base: predictions = model.predict(user_i, item_id, actual_rating) if predictions[3] > 3: L.append(predictions) result = pd.DataFrame(L) result.sort_values(["est"], ascending=False, inplace=True) result = result.loc[:, ["uid", "iid", "est"]] result["주소"] = result["iid"].map(lambda x: x.split('*')[1]) result["iid"] = result["iid"].map(lambda x: x.split('*')[0]) result.reset_index(drop=True, inplace=True) result.columns = ["name", "place", "rating", "region"] return result[:100] # def do(user_i, df) : # global item_base # item_base = set(df["장소"]) # return svd_recommend(user_i)
def model(self, alg_key): reader = Reader(rating_scale = (1, 5)) data_result = Dataset.load_from_df(self.make_df()[['user_id', 'place_id', 'score']], reader) # split data into 5 folds data_result.split(n_folds=10) # evaluation if alg_key.lower() == "svd": alg = SVD() elif alg_key.lower() == "knn": alg = KNNBasic() elif alg_key.lower() == "nmf": alg = NMF() evaluate(alg, data_result, measures=['RMSE', 'MAE']) # prediction # user_0 smallShop_5645 2 test_user = '******' test_id = 'smallShop_7089' real_score = 4 trainset = data_result.build_full_trainset() alg.train(trainset) print(alg.predict(test_user, test_id, real_score))
def craete_personal_value(self): # surprise 라이브러리의 Reader reader = Reader() path = os.path.abspath('') fname = '\com_dayoung_api\cop\mov\model\data\\ratings_small.csv' ratings = pd.read_csv(path + fname, encoding='utf-8') data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader) trainset = data.build_full_trainset() testset = trainset.build_testset() svd = SVD() svd.fit(trainset) predictions = svd.test(testset) accuracy.rmse(predictions) ratings[ratings['userId'] == 1] svd.predict(1, 302, 3) return (svd)
def hybrid_recommendation(title:str, userId: int, df:DataFrame, cosine_sim:np.ndarray, svd:SVD, links:pd.DataFrame) -> pd.DataFrame: movies = get_popular_recomandation(title, df, cosine_sim) for i in range(len(movies['id'])): movies['id'].iloc[i] = links[(links['tmdbId'] == movies['id'].iloc[i])]['movieId'] movies['est'] = movies['id'].apply(lambda x: 0) for i in range(len(movies['id'])): movies['est'].iloc[i] = svd.predict(userId, movies['id'].iloc[i]).est movies = movies.sort_values('est', ascending=False) return movies
def recomendacion(usuario): array = [] for rate in Calificacion.objects.all(): array.append([rate.usuario_id, rate.asignatura_id, rate.calificacion]) df = pd.DataFrame(data=array) reader = Reader(rating_scale=(0, 10)) data = Dataset.load_from_df(df, reader) trainingSet = data.build_full_trainset() param_grid = { 'n_factors': [50, 100, 150], "n_epochs": [40, 50, 60], "lr_all": [0.002, 0.005], "reg_all": [0.4, 0.6] } gs = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=3) gs.fit(data) #Parametros optimos params = gs.best_params["rmse"] SVDoptimized = SVD(n_factors=params['n_factors'], n_epochs=params['n_epochs'], lr_all=params['lr_all'], reg_all=params['reg_all']) SVDoptimized.fit(trainingSet) asig = Asignatura.objects.all() asig_user = Calificacion.objects.all().filter(usuario_id=usuario.id) #Asignaturas sin calificar asignaturas_SinC = [] for asignatura in asig: encontrado = False for asignatura_usuario in asig_user: if (asignatura_usuario.asignatura_id == asignatura.codigo): encontrado = True if (not encontrado): asignaturas_SinC.append(asignatura) #asignaturas_recomendados asignaturas_rec = [] for asignatura in asignaturas_SinC: asignaturas_rec.append({ 'asignatura': asignatura, 'svd': SVDoptimized.predict(usuario.id, asignatura.codigo).est }) # A function that returns the 'year' value: def ordenador(e): return e['svd'] asignaturas_rec.sort(reverse=True, key=ordenador) return asignaturas_rec
class SVDCollaborativeFiltering: # Based on Singular Value Decomposition (SVD) implementation built into surprise library # Uses a matrix factorization method to reduce a matrix into lower dimension parts simplifying the calculations def __init__(self, ratings): # Surprise library does not allow using data frames as training and test set values reader = Reader(rating_scale=(1, 5)) data = Dataset.load_from_df(ratings[['user_id', 'book_id', 'rating']], reader) self.train, self.test = train_test_split(data, test_size=.20) self.model = SVD() def test_model(self): # Checks the predicted values against the test set # Returns Root Mean Square Error (RMSE) accuracy predictions = self.model.test(self.test) return accuracy.mae(predictions, verbose=False), accuracy.rmse(predictions, verbose=False) def train_model(self): # Trains the model on the training set (80% of the total ratings data) self.model.fit(self.train) def predict(self, user_id, books, ratings, already_read=None): # Predicts recommended books for a given user # Gets all unread books if already_read is None: already_read = ratings[ratings['user_id'] == user_id]['book_id'].unique() prediction = books[[ 'book_id', 'title', 'authors', 'average_rating', 'image_url' ]].copy() prediction = prediction[~prediction['book_id'].isin(already_read)] # Predicts a rating for each book and sorts them prediction['predict'] = prediction['book_id'].apply( lambda x: self.model.predict(user_id, x).est) prediction = prediction.sort_values('predict', ascending=False) return convert(prediction) def save(self, location): # Fully saves the model pickle.dump(self, open(location, 'wb')) @staticmethod def load(location): # Loads the model infile = open(location, 'rb') obj = pickle.load(infile) infile.close() return obj
def top_recommended_movies_for_user(userId:int, df:DataFrame, svd:SVD, links:DataFrame): movies = df.copy() for i in range(len(movies['id'])): movies['id'].iloc[i] = links[(links['tmdbId'] == movies['id'].iloc[i])]['movieId'] movies['est'] = movies['id'].apply(lambda x: 0) for i in range(len(movies['id'])): movies['est'].iloc[i] = svd.predict(userId, movies['id'].iloc[i]).est movies = movies.sort_values('est', ascending=False) return movies
class SVD_: def __init__(self, data, rating_scale, n_epochs=50, lr_all=.005, reg_all=.02): self.data = data self.rating_scale = rating_scale self.reader = Reader(rating_scale=self.rating_scale) self.model_data = Dataset.load_from_df( data.loc[:, ["userId", "movieId", "rating"]], self.reader) self.trainset = self.model_data.build_full_trainset() self.model = SVD(n_epochs=n_epochs, lr_all=lr_all, reg_all=reg_all) print('fitting SVD model...') self.model.fit(self.trainset) self.grid_search_ = None def set_model_params(self, model_params): print('updating model parameters...') self.model = SVD(model_params) print('fitting SVD model...') self.model.fit(self.trainset) return self.model def update_grid_search(self, gs): self.grid_search_ = gs def fit(self, data): self.data = data self.model_data = Dataset.load_from_df( data.loc[:, ["userId", "movieId", "rating"]], self.reader) self.trainset = self.model_data.build_full_trainset() self.model.fit(self.trainset) def grid_search(self, grid_params): print('grid search...') gs = GridSearchCV(SVD, grid_params, measures=["rmse", "mae"], cv=3) gs.fit(self.model_data) best_params, best_score = gs.best_params["rmse"], gs.best_score["rmse"] print(f'Best score (RMSE): {best_score}') print(f'Best params (RMSE): {best_params}') print(f'Best score (MAE): {gs.best_score["mae"]}') print(f'Best params (RMSE): {gs.best_params["mae"]}') self.set_model_params(best_params) return best_params def predict(self, test_data): ratings = test_data.apply( lambda x: self.model.predict(x['userId'], x['movieId']).est, axis=1) return ratings
def getSVDReco(username): my_recs = [] items = getItemsReco(username) for iid in items: my_recs.append((iid, SVD.predict(uid=username, iid=iid).est)) Result = pd.DataFrame(my_recs, columns=['product_id', 'predictions' ]).sort_values('predictions', ascending=False).head(10) #Final_Result = TranslateReco(Result, state, city, cat) return Result
def hybrid_rec(userid, favemovie, n): '''this takes in a userid, favemovie and n number of recs and outputs those in a sorted list''' rec_hybrid = content_recommendations(favemovie, n) svd = SVD(n_factors=50, reg_all=0.05, random_state=150) trainset = data.build_full_trainset() svd.fit(trainset) for index, row in rec_hybrid.iterrows(): pred = svd.predict(userid, index) rec_hybrid.at[index, 'score'] = pred.est rec_hybrid = rec_hybrid.sort_values('score', ascending=False) return rec_hybrid
def SDV_algo(id): movies_to_predict = personalise_movie_list_for_user(id) algo1 = SVD() algo1.fit(data.build_full_trainset()) my_recs = [] for iid in movies_to_predict: my_recs.append((iid, algo1.predict(uid=1001, iid=iid).est)) print( pd.DataFrame(my_recs, columns=['iid', 'predictions' ]).sort_values('predictions', ascending=False).head(10))
def factorisation(self, n_user, n_item): #retourne la matrice note complète avec n_user et n_item reader = Reader() data = Dataset.load_from_df(self.data, reader) SVD = surprise.SVD(n_factors=10, n_epochs=10, lr_all=.01, reg_all=.01) results = surprise.model_selection.validation.cross_validate( SVD, data, measures=['MSE'], cv=3, verbose=True) #maintenant on rempli la matrice print("temps d'attente estimé : ", round(n_user * n_item / 105000), "secondes.") M = [] for u in range(n_user): M.append([SVD.predict(u, i).est for i in range(n_item)]) return np.array(M)
def recomendar_colaborativo (usuario): array = [] for rate in Calificacion.objects.all(): array.append([rate.usuario_id, rate.producto_id, rate.calificacion]) df = pd.DataFrame(data=array) reader = Reader(rating_scale=(0, 10)) data = Dataset.load_from_df(df, reader) trainingSet = data.build_full_trainset() param_grid = { 'n_factors':[50,100,150], "n_epochs": [40,50,60], "lr_all": [0.002, 0.005], "reg_all": [0.4, 0.6] } gs = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=3) gs.fit(data) #Parametros optimos params = gs.best_params["rmse"] SVDoptimized = SVD(n_factors=params['n_factors'], n_epochs=params['n_epochs'],lr_all=params['lr_all'], reg_all=params['reg_all']) SVDoptimized.fit(trainingSet) prod = Producto.objects.all() prod_user = Calificacion.objects.all().filter(usuario_id = usuario.id) #Productos sin calificar productos_SinC = [] for producto in prod: encontrado = False for producto_usuario in prod_user: if (producto_usuario.producto_id == producto.idProducto): encontrado = True if (not encontrado): productos_SinC.append(producto) #productos_recomendados productos_rec = [] for producto in productos_SinC: productos_rec.append({'producto': producto, 'svd': SVDoptimized.predict(usuario.id, producto.idProducto).est}) def ordenador(e): return e['svd'] productos_rec.sort(reverse=True, key=ordenador) return productos_rec
def get_pre_rating(self, movie_id_not_rated, user_id): df = self.base_data df = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], self.reader) trainset = df.build_full_trainset() # Build an algorithm, and train it. algo = SVD(n_factors=160, n_epochs=100, lr_all=0.005, reg_all=0.1) algo.fit(trainset) rating_pred = {} for movie_id in movie_id_not_rated: pred = algo.predict(user_id, movie_id) rating_pred[movie_id] = pred.est return rating_pred
def rating_predict(): df = pd.read_csv('data/fit1.csv') train = df[0: df.shape[0] // 10 * 9] test = df[df.shape[0] // 10 * 9:] # SVD col_names = ['user_id', 'item_id', 'rating'] reader = Reader(rating_scale=(2, 10)) data = Dataset.load_from_df(train[col_names], reader) data = data.build_full_train_pairsset() algo = SVD() algo.fit(data) svds = [] for user_id, book_id in zip(train['user_id'], train['item_id']): svds.append(algo.predict(user_id, book_id).est) train['SVD'] = svds
def new_recommendations(df, new_ratings): df = df[['user_id', 'isbn', 'rating']] new_ratings = pd.DataFrame(new_ratings)[['user_id', 'isbn', 'rating']] new_df = pd.concat([df, new_ratings]).reset_index(drop=True) reader = Reader(rating_scale=(1,5)) data = Dataset.load_from_df(new_df,reader) train, test = train_test_split(data, test_size=.2) model = SVD(n_epochs=17, lr_all=.015, reg_all=.125, n_factors=17) model.fit(train) preds = model.test(test) user_id = new_ratings.user_id[0] book_list = [] for x in new_df.isbn.unique(): book_list.append((x, model.predict(user_id,x)[3])) ranked_books = sorted(book_list, key=lambda x: x[1], reverse=True) return ranked_books