def compare_models(list_of_models, metric): ''' list_of_models: model names in a list metric: ‘rmse’ or ‘precision_recall’ ''' agg_list = [ agg.AVG('precision'), agg.STD('precision'), agg.AVG('recall'), agg.STD('recall') ] # apply above functions to each group(group the results by cutoff k which is the number of top items to look for print rmse_results['precision_recall_by_user'].groupby('cutoff', agg_list) comparisonstruct = graphlab.recommender.util.compare_models( test_data, model_names=list_of_models, metric=metric) return graphlab.show_comparison(comparisonstruct, list_of_models)
m1 = gl.item_similarity_recommender.create(training_data, user_id='user_id', item_id='wine_name', target='score') m2 = gl.item_similarity_recommender.create(training_data, user_id='user_id', item_id='wine_name', target='score',only_top_k=1) #Load and compare multiple models: high_filter = gl.load_model('../models/high_filter') onezero = gl.load_model('../models/onezero') baseline = gl.load_model('../models/baseline') gridsearch = gl.load_model('../models/gridsearch') # model_comp = gl.recommender.util.compare_models(test_data, [m1, m2, baseline, gridsearch, high_filter, onezero], model_names=["m1", "m2", "baseline", "gridsearch", "high_filter", "onezero"], metric='rmse') # #model_comp = gl.recommender.util.compare_models(test_data, [baseline, gridsearch, high_filter, onezero] ) # gl.show_comparison(model_comp, [m1, m2, baseline, gridsearch, high_filter, onezero]) # Show an interactive view # view = model.views.evaluate(test_data) # view.show() # # # Explore predictions # view = model.views.explore(item_data=items,item_name_column='wine_name') # # # Explore evals # view = model.views.overview(validation_set=test_data,item_data=items,item_name_column='wine_name') # view.show() '''
item_sim_cosine= graphlab.item_similarity_recommender.create(train_data, user_id='user_id', item_id='movie_id', target='rating', similarity_type='cosine') item_sim_jaccard = graphlab.item_similarity_recommender.create(train_data, user_id='user_id', item_id='movie_id', target='rating', similarity_type='jaccard') #Make Recommendations: print("\n Collaborative Filtering Model(pearson)") item_sim_recomm = item_sim_pearson.recommend(users=range(1,6),k=5) item_sim_recomm.print_rows(num_rows=25) print("\n Collaborative Filtering Model(cosine)") item_sim_recomm1 = item_sim_cosine.recommend(users=range(1,6),k=5) item_sim_recomm1.print_rows(num_rows=25) print("\n Collaborative Filtering Model(jaccard)") item_sim_recomm2 = item_sim_jaccard.recommend(users=range(1,6),k=5) item_sim_recomm2.print_rows(num_rows=25) #graphlab.item_similarity_recommender.compare_models(train_data, [popularity_model, item_sim_model,item_sim_model1,item_sim_model2],metric='precision_recall') #Evaluating Recommendation Engines #Lets compare both the models built till now based on precision-recall characteristics: model_performance = graphlab.compare(test_data, [popularity_model, item_sim_pearson,item_sim_cosine,item_sim_jaccard]) graphlab.show_comparison(model_performance,[popularity_model, item_sim_pearson,item_sim_cosine,item_sim_jaccard]) # factorization method #fact_rec_model=graphlab.recommender.factorization_recommender.create(train_data, user_id='user_id', item_id='movie_id', target='rating', user_data=None, item_data=None, num_factors=8, regularization=1e-08, linear_regularization=1e-10, side_data_factorization=True, nmf=False, binary_target=False, max_iterations=50, sgd_step_size=0, random_seed=0, solver='auto', verbose=True) #Make Recommendations: #fact_sim_recomm = fact_rec_model.recommend(users=range(1,6),k=5) #fact_sim_recomm.print_rows(num_rows=25) graphlab.canvas.show() graphlab.canvas.set_target('ipynb')
popularity_recomm.print_rows(num_rows=25) #Train Model(pearson) item_sim_model = graphlab.item_similarity_recommender.create( train_data, user_id='user_id', item_id='anime_id', target='rating', similarity_type='pearson') item_sim_recomm = item_sim_model.recommend(users=range(1, 6), k=5) item_sim_recomm.print_rows(num_rows=25) #Campare popular model vs. pearson model model_performance = graphlab.compare(test_data, [popularity_model, item_sim_model]) graphlab.show_comparison(model_performance, [popularity_model, item_sim_model]) #Train Model(Jaccard) item_sim_model2 = graphlab.item_similarity_recommender.create( train_data, user_id='user_id', item_id='anime_id', target='rating', similarity_type='jaccard') item_sim_recomm2 = item_sim_model2.recommend(users=range(1, 6), k=5) item_sim_recomm2.print_rows(num_rows=25) #campare 3 models model_performance3 = graphlab.compare( test_data, [popularity_model, item_sim_model, item_sim_model2]) graphlab.show_comparison(model_performance3,
# In[19]: personalized_model.get_similar_items( ['Chan Chan (Live) - Buena Vista Social Club']) # # Quantitative comparison between the models # # We now formally compare the popularity and the personalized models using precision-recall curves. # In[20]: if graphlab.version[:3] >= "1.6": model_performance = graphlab.compare( test_data, [popularity_model, personalized_model], user_sample=0.05) graphlab.show_comparison(model_performance, [popularity_model, personalized_model]) else: get_ipython().magic(u'matplotlib inline') model_performance = graphlab.recommender.util.compare_models( test_data, [popularity_model, personalized_model], user_sample=.05) # The curve shows that the personalized model provides much better performance. # In[41]: west = song_data[song_data['artist'] == 'Kanye West'] count_west = len(west['user_id'].unique()) count_west # In[42]:
rating_ent = airline_us.dropna(subset=['inflight_entertainment_rating']) train = rating_all[:4000] test = rating_all[4000:] # convert tables into SFrames using graphlab train_data = graphlab.SFrame(train) test_data = graphlab.SFrame(test) # A Simple Popularity Model popularity_model = graphlab.popularity_recommender.create(train_data, user_id='author', item_id='airline_name', target='overall_rating') #Get recommendations for first 5 users and print them #k=5 specifies top 5 recommendations to be given popularity_recomm = popularity_model.recommend(k=5) popularity_recomm.print_rows(num_rows=25) # verified by checking the airlines with highest mean rating train.groupby(by='airline_name')['overall_rating'].mean().sort_values(ascending=False) # A Collaborative Filtering Model #Train Model item_sim_model = graphlab.item_similarity_recommender.create(train_data, user_id='author', item_id='airline_name', target='overall_rating', similarity_type='pearson') #Make Recommendations: item_sim_recomm = item_sim_model.recommend(k=5) item_sim_recomm.print_rows(num_rows=25) model_performance = graphlab.compare(test_data, [popularity_model, item_sim_model]) graphlab.show_comparison(model_performance,[popularity_model, item_sim_model])
ratings_test = pd.read_csv('ml-100k/ub.test', sep='\t', names=r_cols, encoding='latin-1') #print ratings_base.shape #print ratings_test.shape train_data = graphlab.SFrame(ratings_base) test_data = graphlab.SFrame(ratings_test) #print ratings_base.groupby(by='movie_id')['rating'].mean().sort_values(ascending=False).head(20) item_sim_model = graphlab.item_similarity_recommender.create( train_data, user_id='user_id', item_id='movie_id', target='rating', similarity_type='pearson') item_sim_recomm = item_sim_model.recommend(users=range(1, 100), k=20) item_sim_recomm.print_rows(num_rows=2000) model_performance = graphlab.compare(test_data, [item_sim_model]) graphlab.show_comparison(model_performance, [item_sim_model])
def compare_models(test_data, m1, m2): model_performance = graphlab.compare(test_data, [m1, m2]) graphlab.show_comparison(model_performance, [m1, m2])
recommender_movie_model = graphlab.item_similarity_recommender.create( training_data, user_id='user_id', item_id='movie_id', target='rating', similarity_type='cosine') recommend_movie = recommender_movie_model.recommend(users=range(1, 5), k=6) recommend_movie.print_rows(num_rows=24) # To check for user's recommendation recommend_movie = recommender_movie_model.recommend( users=[946], k=6) # Replace "946" with your user_id recommend_movie.print_rows(num_rows=6) # Evaluation of our model rating_test_data = pd.read_csv('ml-100k/ua.test', sep='\t', names=r_cols, encoding='latin-1') test_data = graphlab.SFrame(rating_test_data) # print rating_test_data.shape evaluation = graphlab.compare( test_data, [popularity_movie_model, recommender_movie_model]) graphlab.show_comparison(evaluation, [popularity_movie_model, recommender_movie_model]) """ Movie recommendation system # Reference: https://www.analyticsvidhya.com/blog/2016/06/quick-guide-build-recommendation-engine-python/ """
# In[ ]: personalized_model.get_similar_items(["Chan Chan (Live) - Buena Vista Social Club"]) # #Quantitative comparison between the models # # We now formally compare the popularity and the personalized models using precision-recall curves. # In[ ]: if graphlab.version[:3] >= "1.6": model_performance = graphlab.compare(test_data, [popularity_model, personalized_model], user_sample=0.05) graphlab.show_comparison(model_performance, [popularity_model, personalized_model]) else: get_ipython().magic(u"matplotlib inline") model_performance = graphlab.recommender.util.compare_models( test_data, [popularity_model, personalized_model], user_sample=0.05 ) # The curve shows that the personalized model provides much better performance. # In[21]: item_similarity_model = graphlab.item_similarity_recommender.create(train_data, user_id="user_id", item_id="song_id") # In[22]:
TestRatings = pd.read_csv('Data/ml-100k/ua.test', sep='\t', names=ratingsCol, encoding='latin-1') #TestRatings.shape #TrainRatings.shape import graphlab as gl trainData = gl.SFrame(TrainRatings) testData = gl.SFrame(TestRatings) popModel = gl.popularity_recommender.create(trainData, user_id='user_id', item_id='movie_id', target='rating') recommendedMovies = popModel.recommend(users=range(1, 6), k=5) recommendedMovies.print_rows(num_rows=25) itemItemCF = gl.item_similarity_recommender.create( trainData, user_id='user_id', item_id='movie_id', target='rating', ) recommendedMovies2 = itemItemCF.recommend(users=range(1, 6), k=5) recommendedMovies2.print_rows(num_rows=25) performanceComparasion = gl.compare(testData, [popModel, itemItemCF]) gl.show_comparison(performanceComparasion, [popModel, itemItemCF])
# every user gets the same recommendation, based on popularity of the product # personalised model personalised_model = gl.item_similarity_recommender.create(train_data,user_id='user_id',item_id='song') # predictions personalised_model.recommend(users=[users[0]]) personalised_model.recommend(users=[users[1]]) # similar items personalised_model.get_similar_items(['With Or Without You - U2']) # recommender models comparison model_performance = gl.compare(test_data, [popularity_model, personalised_model], user_sample=0.05) gl.show_comparison(model_performance,[popularity_model, personalised_model]) # assignment # unique listeners to certain artists len(song_data[song_data['artist'] == 'Kanye West']['user_id'].unique()) len(song_data[song_data['artist'] == 'Foo Fighters']['user_id'].unique()) len(song_data[song_data['artist'] == 'Taylor Swift']['user_id'].unique()) len(song_data[song_data['artist'] == 'Lady GaGa']['user_id'].unique()) # most and least popular artist aggregated = song_data.groupby(key_columns='artist', operations={'total_count': gl.aggregate.SUM('listen_count')}) aggregated.sort('total_count', ascending=False) # most popular aggregated.sort('total_count') # least popular # most recommended song
import graphlab data = graphlab.SFrame.read_csv( "https://static.turi.com/datasets/movie_ratings/training_data.csv", column_type_hints={"rating": int}) #print data.head() data_train, data_test = data.random_split(.8, seed=5) model1 = graphlab.recommender.create(data_train, user_id="user", item_id="movie", target="rating") #results = model.recommend(users=None, k=5) #print results.head(20) model2 = graphlab.popularity_recommender.create(data_train, user_id="user", item_id="movie", target="rating") #results2 = model2.recommend(users=None, k=5) #print results2.head(20) model_performance = graphlab.compare(data_test, [model1, model2]) graphlab.show_comparison(model_performance, [model1, model2])