Ejemplo n.º 1
0
def run_model(personality):

    reader = Reader(sep=',', skip_lines=0, rating_scale=(0.0, 1.0))
    df = pd.DataFrame(json_to_pandas())

    new = pd.DataFrame(user_to_dfrows(len(df.index), personality))
    df = df.append(new)
    data = Dataset.load_from_df(df[['user', 'trait', 'percentile']],
                                reader=reader)

    # pdb.set_trace()
    trainset = data.build_full_trainset()
    # Use user_based true/false to switch between user-based or item-based collaborative filtering
    # algo = KNNWithMeans(k=40, sim_options={
    # 'name': 'pearson_baseline', 'user_based': False})
    algo = SVD()
    # algo.fit(trainset)
    algo.fit(trainset)
    # exit(1)
    testset = trainset.build_anti_testset()
    predictions = algo.test(testset)

    import heapq
    heap = []
    for prediction in predictions:
        heap.append((prediction[3], prediction[1]))
    heapq.heapify(heap)
    return heapq.nlargest(3, heap)
Ejemplo n.º 2
0
from surprise import Reader
from surprise.prediction_algorithms import SVD ,SVDpp  
from surprise import accuracy
from surprise.model_selection import KFold
import pandas as pd
import  os

reader = Reader(line_format='user item rating', sep=',', skip_lines=1)
data = Dataset.load_from_file( "./ratings.csv" , reader = reader) 
from surprise.model_selection import train_test_split 

x_train , x_test = train_test_split( data ,test_size = 0.2 ,random_state = 10000 )
svd = SVD(biased= False) 
svd.fit(x_train)

prediction = svd.test(x_test)
accuracy.rmse( predictions=prediction )
# RMSE: 0.8548
# 0.8547798833361556

import pandas as pd 
import numpy as np
datas = pd.read_csv("ratings.csv" ,delimiter="," , skiprows=1  , names  =["user" ,"item" ,"rating"] ,usecols= [0,1,2] )
datas["user"] = datas["user"].astype(np.int32)
datas["item"] = datas["item"].astype(np.int32)
datas["rating"] = datas["rating"].astype(np.int32)
print(datas.dtypes )

reader = Reader(line_format='user item rating' )
data = Dataset.load_from_df( datas ,reader ) 
from surprise.model_selection import train_test_split 
Ejemplo n.º 3
0
from surprise import accuracy
popular_movies_df = pd.read_csv('popular_movies.csv')
ratings_df = pd.read_csv('ratings_limited_users.csv',
                         usecols=['userId', 'movieId', 'rating'])
movies_df = pd.read_csv('movies.csv')
# Initializing a reader and data class
reader = Reader()
data = Dataset.load_from_df(ratings_df, reader)

# Splitting the data into train and test sets
trainset, testset = train_test_split(data, test_size=.25)

# Using the tuned parameters for the SVD model
svd = SVD(n_factors=100, n_epochs=30, lr_all=0.01, reg_all=0.1)
svd.fit(trainset)
svd_preds = svd.test(testset)


# Function to get new users preferences on any movie or a particular genre
def movie_rater(movie_df, num=5, genre=None):
    """ This function is to handle a cold start with a new user.  It takes in a number of ratings
        from a new user and gives the output of 5 movie recommendations.
        
        Args:
            movie_df(dataframe): the dataframe of movies that you will use to recommend movies
            num(integer): the number of ratings you want the user to input before giving a recommendation. The default value is 5.
            genre(string): The genre of movies that you wish to pull from for your user to rate.  The default is None.
        
        Returns:
            The output is a list of 5 movies with their titles and genres receommended for the user based on their initial ratings given.  
            A collaborative filter is used to add their ratings to the inital dataframe to then find this output."""