def main(): # Load data and parse it: # mymovies = data.load_dat("movies.dat") # myratings = data.load_dat("ratings.dat") # Dump the parsed data to pickles into the file system: # data.dump_pickle(mymovies, generate_file_name("movies", "pkl")) # data.dump_pickle(myratings, generate_file_name("ratings", "pkl")) # Load the pickles (much faster than loading and parsing again the raw data): movies_pkl = data.load_pickle(MOVIE_PICKLE_LOCATION) ratings_pkl = data.load_pickle(RATINGS_PICKLE_LOCATION) # Question 1: print "Question 1: simple product association 1 and 1064 = ", calculate_simple_association( 1, 1064) # Question 2: print "Question 2: advanced product association 1 and 1064 = ", calculate_advanced_association( 1, 1064) # Question 4: print "Question 4: simple product association 1 and 2858 =", calculate_simple_association( 1, 2858) # Question 5: print "Question 5: title and genre (1, 1064, 2858)." for id in [1, 1064, 2858]: title, genre = utils.get_title_genre(id, movies_pkl) print "\t-", id, "=", title, "::", genre # Question 7: print "Question 7: advanced product association 1 and 2858 = ", calculate_advanced_association( 1, 2858) # Question 9: print "Question 9: top 10 most rated movies; provide ID, number of users who rated and title." top10_movies, top10_ratings, top10_movieids = topN_most_rated_movies(10) for i in range(len(top10_movies)): print "\tID:", top10_movieids[i], ":: Num. users:", top10_ratings[ i], ":: Title:", top10_movies[i] # Question 10: print "Question 10: top 5 movies with highest simple association with movie 3941; provide ID, value and title." topn = topN_movies_simple_association(3941, 5) for item in topn: print "\t", item # Question 11: print "Question 11: top 5 movies with highest advanced association with movie 3941; provide ID, value and title." topn = topN_movies_advanced_association(3941, 5) for item in topn: print "\t", item # Question 14: print "Question 14: top 10 most rated movies with at least 4 stars; provide ID, number of users who rated and title." top10_movies, top10_ratings, top10_movieids = topN_most_rated_movies(10, 4) for i in range(len(top10_movies)): print "\tID:", top10_movieids[i], ":: Num. users:", top10_ratings[ i], ":: Title:", top10_movies[i]
def calculate_advanced_association(movieX, movieY, ratings=None): """ Calculates the advanced association value for movieX with respect movieY. :param movieX: ID of movie X :param movieY: ID of movie Y :param ratings: collection of all ratings. If available at calling time, then it can be used, otherwise it will be locally retrieved. :return: the value computed by the advanced association """ # First get the data (preferably by parameter otherwise from pickles): if ratings is None: ratings = data.load_pickle( RATINGS_PICKLE_LOCATION ) # Movies data-set not needed, just ratings. X = globals.AMOUNT_RATED_X if X is None: X = utils.how_many_Z(movieX, ratings) Y = utils.how_many_Z(movieY, ratings) XY = utils.how_many_X_and_Y(movieX, movieY, ratings) notX = len(ratings) - X notXY = Y - XY try: value = (float(XY) / X) / (float(notXY) / notX) return value except ZeroDivisionError, err: pass # print "[ERROR] ", err # print "X = ", X # print "XY = ", XY # print "notXY = ", notXY # print "notX = ", notX return 0.0
def topN_movies_advanced_association(movieX_ID, N=10): """ Retrieves a list of movie IDs with the highest advanced association value with respect movieX. In case of a tie, the movie with the higher ID is ranked before the movie with lower ID. :param movieX_ID: ID of movie X :param N: number of movies to put in the returned list (topN) :return: a list of tuples with movie ID, association value and title """ # First get the data (preferably from pickles): movies = data.load_pickle( MOVIE_PICKLE_LOCATION) # Ratings data-set is not needed in this case. ratings = data.load_pickle(RATINGS_PICKLE_LOCATION) # To SPEED UP the execution of the algorithm and avoid repetitive jobs/tasks, we will obtain/retrieve now: # - How many times movie X was rated # - A collection of all ratings indexed by user # Normally you would obtain this info in another more appropriate (inner) methods, but since we are now iterating # over all movies, there are some variables or data that is common to every iteration, thus we obtain it outside the # loop. These are global variables that can be consulted from other modules or Python files. globals.AMOUNT_RATED_X = utils.how_many_Z(movieX_ID, ratings) globals.RATINGS_BY_USER = utils.extract_ratings_by_user(ratings) # Now, we will iterate over all movies to calculate their respective simple association value given the movieX ID. aa_values = [ ] # A list of tuples with the following form: "(movieID, association value)" for i, movieY_ID in enumerate(movies.keys()): # movies.keys()[:100] aa_values.append( (int(movieY_ID), calculate_advanced_association(movieX_ID, movieY_ID, ratings))) # print "(i=", i, ")Appended movie: ", movieY_ID mysorted = sorted( aa_values, key=operator.itemgetter(1, 0), reverse=True) # tuples sorted from BIG to SMALL association value mysorted = mysorted[ 1:N + 1] # we are interested just in the top N tuples, except the query movie itself topN = [] for elem in mysorted: topN.append((elem[0], elem[1], movies[str(elem[0])]['title'])) return topN
def test(): """ This function is used as a test-bed. Just for TESTING purposes. It shouldn't be used for production code. :return: """ print "HelloWorldTEST!" # ------------------------------- # Testing library against Sugestio API: # client = sugestio.Client(ACCOUNT, SECRET) # status, content = client.get_recommendations(1, 5) # if status == 200: # print("Title\tScore") # for recommendation in content: # print(recommendation.item.title + "\t" + str(recommendation.score)) # else: # print("server response code:", status) # ------------------------------- # Testing own implemented methods: # Data: movies_pkl = data.load_pickle(MOVIE_PICKLE_LOCATION) ratings_pkl = data.load_pickle(RATINGS_PICKLE_LOCATION) # Testing methods: submit_metadata_single_movie(83829, movies_pkl['83829']) submit_rating_single_movie(8, ratings_pkl[3500]) update_rating_single_movie(8, ratings_pkl[200]) s, recommendations = topN_recommendations_user(1, N=5) for i, rec in enumerate(recommendations): print "\t\t Recommendation#", i, ": ", rec s, consumptions = rating_history_user(1) for i, con in enumerate(consumptions): print "\t\t Consumptions#", i, ": ", con s, mov = get_metadata_movie(2) print "Movie details: ", mov submit_movies_metadata_bulk(movies_pkl.values()) submit_movies_ratings_bulk(ratings_pkl)
def test(): """ This function is used as a test-bed. Just for TESTING purposes. It shouldn't be used for production code. :return: """ # Load data and parse it: # mymovies = data.load_dat("movies.dat") # myratings = data.load_dat("ratings.dat") # Dump the parsed data to pickles into the file system: # data.dump_pickle(mymovies, generate_file_name("movies", "pkl")) # data.dump_pickle(myratings, generate_file_name("ratings", "pkl")) # Load the pickles (much faster than loading and parsing again the raw data): movies_pkl = data.load_pickle(MOVIE_PICKLE_LOCATION) ratings_pkl = data.load_pickle(RATINGS_PICKLE_LOCATION) print "len(movies_pkl): ", len(movies_pkl) print "len(ratings_pkl): ", len(ratings_pkl) # print movies_pkl['3196'] # print ratings_pkl[0] # Plot top10 rated movies with/without stars: top10_movies, top10_ratings, top10_movieids = topN_most_rated_movies(10) utils.plot_top10_rated_distribution(top10_movies, top10_ratings)
def topN_most_rated_movies(N=10, stars=None): """ Retrieves a list of movie names and another list with their corresponding ratings which are the most rated movies. :param N: number of movies to put in the returned list (topN) :param stars: number of stars (integer) for which movies will be extracted for the topN :return: a list of (most rated) movie names AND a list of their ratings as well. Ordered from BIG to SMALL. """ aggregated_ratings = {} topN_movies = [] topN_ratings = [] # First, let's load the data: movies = data.load_pickle(MOVIE_PICKLE_LOCATION) ratings = data.load_pickle(RATINGS_PICKLE_LOCATION) # Now, we will iterate over all ratings and we will aggregate/count all ratings for every movie: for elem in ratings: if stars is not None and int(elem['rating']) >= stars: continue if not elem['movieid'] in aggregated_ratings: aggregated_ratings[elem['movieid']] = 1 else: aggregated_ratings[elem['movieid']] += 1 # print "Num elements (aggregated dictionary): ", len(aggregated_ratings) # 'sorted' function sorts the dictionary from SMALL to BIG, returns a list: mysorted = sorted(aggregated_ratings.items(), key=operator.itemgetter(1), reverse=True) # We take the first N elements (Top N): mysorted = mysorted[:N] # We collect the movie IDs: topN_movieIDs = [elem[0] for elem in mysorted] # We are also interested in movie NAMES: for elem in mysorted: topN_movies.append(movies[elem[0]]['title']) topN_ratings.append(elem[1]) return topN_movies, topN_ratings, topN_movieIDs
def test(): """ This function is used as a test-bed. Just for TESTING purposes. It shouldn't be used for production code. :return: """ print "HelloWorldTEST!" movies_pkl = data.load_pickle(MOVIE_PICKLE_LOCATION) ratings_pkl = data.load_pickle(RATINGS_PICKLE_LOCATION) print "len(movies_pkl): ", len(movies_pkl) print "len(ratings_pkl): ", len(ratings_pkl) globals.RATINGS_BY_USER = utils.extract_ratings_by_users(ratings_pkl) globals.RATINGS_BY_USER_MAP = utils.extract_ratings_by_users_map(ratings_pkl) # globals.RATINGS_X_BY_USERS = utils.extract_ratings_x_by_users(movies_pkl, ratings_pkl) # globals.MEAN_RATINGS_ITEM = utils.extract_mean_ratings(movies_pkl) globals.RATINGS_X_BY_USERS = data.load_pickle(RATINGS_X_BY_USERS_PATH) # load pickle globals.MEAN_RATINGS_ITEM = data.load_pickle(MEAN_RATINGS_ITEM_PATH) # load pickle globals.SIMILARITY_TYPE = globals.SimilarityType() print "len(RATINGS_BY_USER): ", len(globals.RATINGS_BY_USER) print "len(RATINGS_BY_USER_MAP): ", len(globals.RATINGS_BY_USER_MAP) print "len(RATINGS_X_BY_USERS): ", len(globals.RATINGS_X_BY_USERS) print "len(MEAN_RATINGS_ITEM): ", len(globals.MEAN_RATINGS_ITEM) print "SIMILARITY_TYPE.type(): ", globals.SIMILARITY_TYPE.type()
def calculate_simple_association(movieX, movieY, ratings=None): """ Calculates the simple association value for movieX with respect movieY. :param movieX: ID of movie X :param movieY: ID of movie Y :param ratings: collection of all ratings. If available at calling time, then it can be used, otherwise it will be locally retrieved. :return: the value computed by the simple association """ # First get the data (preferably by parameter otherwise from pickles): if ratings is None: ratings = data.load_pickle( RATINGS_PICKLE_LOCATION ) # Movies data-set not needed, just ratings. XY = float(utils.how_many_X_and_Y(movieX, movieY, ratings)) X = globals.AMOUNT_RATED_X if X is None: X = utils.how_many_Z(movieX, ratings) value = XY / X return value
print_attribute("ambitus", collection, out) out.write(rst_header("Pickup", 3)) out.write("1 if has pickup\n\n") print_attribute("has_pickup", collection, out) def make_intervals_webpage(alist): with codecs.open("../docs/intervals.rst", 'w', encoding="utf-8") as out: out.write(rst_header("Intervals", 1)) if __name__ == '__main__': brasil = data.load_pickle("brasil") europa = data.load_pickle("europa") binary = ('2/2', '2/4', '2/8', '4/2', '4/4', '4/8', '6/8', '6/16', '12/8') ternary = ('3/4', '3/8', '9/8') # alist = intervals.list_songs_with_dissonant_intervals(brasil) # make_dissonant_intervals_webpage(alist) #make_basic_attributes_webpage((("Brasil", brasil), ("Europa", europa))) make_intervals_webpage(None) make_contours_webpage({ "Brasil": brasil, "Europa": europa,
def main(): # Load data and parse it: # mymovies = data.load_dat("movies.csv") # myratings = data.load_dat("ratings.csv") # Dump the parsed data to pickles into the file system: # data.dump_pickle(mymovies, data.generate_file_name("movies", "pkl")) # data.dump_pickle(myratings, data.generate_file_name("ratings", "pkl")) # Load the pickles (much faster than loading and parsing again the raw data): movies_pkl = data.load_pickle(MOVIE_PICKLE_LOCATION) ratings_pkl = data.load_pickle(RATINGS_PICKLE_LOCATION) print "len(movies_pkl): ", len(movies_pkl) print "len(ratings_pkl): ", len(ratings_pkl) # submit_movies_metadata_bulk(movies_pkl.values()) # submit_movies_ratings_bulk(ratings_pkl) # # Question 1: print "Question 1: rating of movie 1125 by user 289." # s, rating = utils.get_rating(289, 1125) # print "\t\t| rating =", utils.decode_stars(rating[0].detail) # print "\t\t| date & time =", rating[0].date # s, metadata = get_metadata_movie(1125) # print "\t\t| title =", metadata.title # print "\t\t| genre(s) =", # for genre in metadata.category: # print genre, # print "" # this print is just for readability # # Question 3: print "\nQuestion 3: rating history user 249." # s, history = rating_history_user(249) # rating_stars = [] # for rating in history: # s, metadata = get_metadata_movie(rating.itemid) # genres = "" # for genre in metadata.category: # if genres is "": # genres = "{0}".format(genre) # else: # genres = "{0}, {1}".format(genres, genre) # print "\t\t| Movie id =", rating.itemid, ":: Title =", metadata.title, ":: Genre(s) =", genres, ":: Rating =", utils.decode_stars(rating.detail) # rating_stars.append(utils.decode_stars(rating.detail)) # print "\t\t| Mean =", mean(rating_stars) # print "\t\t| Median =", median(rating_stars) # print "\t\t| Uniqueness =", dict(zip(*unique(rating_stars, return_counts=True))) # # Question 4: print "\nQuestion 4: top five collaborative filtering recommendations user 249." # s, top5 = topN_recommendations_user(249, 5) # for rec in top5: # genres = "" # for genre in rec.item.category: # if genres is "": # genres = "{0}".format(genre) # else: # genres = "{0}, {1}".format(genres, genre) # print "\t\t| Movie id =", rec.item.id, ":: Title =", rec.item.title, ":: Genre(s) =", genres, ":: Score =", rec.score # # Question 5: print "\nQuestion 5: rating history user 35." # s, history = rating_history_user(35) # rating_stars = [] # for rating in history: # s, metadata = get_metadata_movie(rating.itemid) # genres = "" # for genre in metadata.category: # if genres is "": # genres = "{0}".format(genre) # else: # genres = "{0}, {1}".format(genres, genre) # print "\t\t| Movie id =", rating.itemid, ":: Title =", metadata.title, ":: Genre(s) =", genres, ":: Rating =", utils.decode_stars(rating.detail) # rating_stars.append(utils.decode_stars(rating.detail)) # print "\t\t| Mean =", mean(rating_stars) # print "\t\t| Median =", median(rating_stars) # print "\t\t| Uniqueness =", dict(zip(*unique(rating_stars, return_counts=True))) # # Question 6: print "\nQuestion 6: top five collaborative filtering recommendations user 35." # s, top5 = topN_recommendations_user(35, 5) # for rec in top5: # genres = "" # for genre in rec.item.category: # if genres is "": # genres = "{0}".format(genre) # else: # genres = "{0}, {1}".format(genres, genre) # print "\t\t| Movie id =", rec.item.id, ":: Title =", rec.item.title, ":: Genre(s) =", genres, ":: Score =", rec.score # # Question 8: print "\nQuestion 8: top five content based recommendations user 249." # s, top5 = topN_recommendations_user(249, 5) # for rec in top5: # genres = "" # for genre in rec.item.category: # if genres is "": # genres = "{0}".format(genre) # else: # genres = "{0}, {1}".format(genres, genre) # print "\t\t| Movie id =", rec.item.id, ":: Title =", rec.item.title, ":: Genre(s) =", genres, ":: Score =", rec.score # # Question 9: print "\nQuestion 9: top five content based recommendations user 35." # s, top5 = topN_recommendations_user(35, 5) # for rec in top5: # genres = "" # for genre in rec.item.category: # if genres is "": # genres = "{0}".format(genre) # else: # genres = "{0}, {1}".format(genres, genre) # print "\t\t| Movie id =", rec.item.id, ":: Title =", rec.item.title, ":: Genre(s) =", genres, ":: Score =", rec.score # Question 10a: print "\nQuestion 10a: additional raitings for new user 1000." # additional_ratings = [ # {'userid': 1000, 'movieid': 1590, 'rating': 4.0, 'timestamp': 1476640644}, # {'userid': 1000, 'movieid': 1196, 'rating': 4.5, 'timestamp': 1476640644}, # {'userid': 1000, 'movieid': 4878, 'rating': 4.0, 'timestamp': 1476640644}, # {'userid': 1000, 'movieid': 589, 'rating': 4.5, 'timestamp': 1476640644}, # {'userid': 1000, 'movieid': 480, 'rating': 4.5, 'timestamp': 1476640644} # ] # submit_movies_ratings_bulk(additional_ratings) # Make sure to execute this once and at the correct moment # Question 10b: print "\nQuestion 10b: rating history user 1000." # s, history = rating_history_user(1000) # for rating in history: # s, metadata = get_metadata_movie(rating.itemid) # genres = "" # for genre in metadata.category: # if genres is "": # genres = "{0}".format(genre) # else: # genres = "{0}, {1}".format(genres, genre) # print "\t\t| Movie id =", rating.itemid, ":: Title =", metadata.title, ":: Genre(s) =", genres, ":: Rating =", utils.decode_stars(rating.detail) # Question 12: print "\nQuestion 12: top 10 collaborative filtering recommendations user 1000." # s, top10 = topN_recommendations_user(1000, 10) # for rec in top10: # genres = "" # for genre in rec.item.category: # if genres is "": # genres = "{0}".format(genre) # else: # genres = "{0}, {1}".format(genres, genre) # print "\t\t| Movie id =", rec.item.id, ":: Title =", rec.item.title, ":: Genre(s) =", genres, ":: Score =", rec.score # Question 14: print "\nQuestion 14: top 10 content based recommendations user 1000." # s, top10 = topN_recommendations_user(1000, 10) # for rec in top10: # genres = "" # for genre in rec.item.category: # if genres is "": # genres = "{0}".format(genre) # else: # genres = "{0}, {1}".format(genres, genre) # print "\t\t| Movie id =", rec.item.id, ":: Title =", rec.item.title, ":: Genre(s) =", genres, ":: Score =", rec.score # Question 15: print "\nQuestion 15: additional rating by user 1000." # s = submit_rating_single_movie(6587, {'userid': 1000, 'movieid': 6587, 'rating': 1.0, 'timestamp': 1476640644}) # s, metadata = get_metadata_movie(6587) # genres = "" # for genre in metadata.category: # if genres is "": # genres = "{0}".format(genre) # else: # genres = "{0}, {1}".format(genres, genre) # print "\t\t| Movie title =", metadata.title, ":: Genre(s) =", genres # Question 17: print "\nQuestion 17: top 10 content based recommendations user 1000." # s, top10 = topN_recommendations_user(1000, 10) # for rec in top10: # genres = "" # for genre in rec.item.category: # if genres is "": # genres = "{0}".format(genre) # else: # genres = "{0}, {1}".format(genres, genre) # print "\t\t| Movie id =", rec.item.id, ":: Title =", rec.item.title, ":: Genre(s) =", genres, ":: Score =", rec.score # Question 18: print "\nQuestion 18: delete all consumptions user 1000."
def main(): # Load data and parse it: # mymovies = data.load_dat("movies.csv") # myratings = data.load_dat("ratings.csv") # Dump the parsed data to pickles into the file system: # data.dump_pickle(mymovies, data.generate_file_name("movies", "pkl")) # data.dump_pickle(myratings, data.generate_file_name("ratings", "pkl")) # Load the pickles (much faster than loading and parsing again the raw data): movies_pkl = data.load_pickle(MOVIE_PICKLE_LOCATION) ratings_pkl = data.load_pickle(RATINGS_PICKLE_LOCATION) print "len(movies_pkl): ", len(movies_pkl) print "len(ratings_pkl): ", len(ratings_pkl) # Setting some global variables and general information: globals.RATINGS_BY_USER = utils.extract_ratings_by_users(ratings_pkl) globals.RATINGS_BY_USER_MAP = utils.extract_ratings_by_users_map(ratings_pkl) globals.RATINGS_X_BY_USERS = utils.extract_ratings_x_by_users(movies_pkl, ratings_pkl) globals.MEAN_RATINGS_ITEM = utils.extract_mean_ratings(movies_pkl) # globals.RATINGS_X_BY_USERS = data.load_pickle(RATINGS_X_BY_USERS_PATH) # Loading the pickle is faster than calculating it in run-time # globals.MEAN_RATINGS_ITEM = data.load_pickle(MEAN_RATINGS_ITEM_PATH) # Loading the pickle is faster than calculating it in run-time globals.SIMILARITY_TYPE = globals.SimilarityType() print "len(RATINGS_BY_USER): ", len(globals.RATINGS_BY_USER) print "len(RATINGS_BY_USER_MAP): ", len(globals.RATINGS_BY_USER_MAP) print "len(RATINGS_X_BY_USERS): ", len(globals.RATINGS_X_BY_USERS) print "len(MEAN_RATINGS_ITEM): ", len(globals.MEAN_RATINGS_ITEM) print "SIMILARITY_TYPE.type(): ", globals.SIMILARITY_TYPE.type() # Dump some globals (just for the first time): # data.dump_pickle(globals.RATINGS_X_BY_USERS, data.generate_file_name("RATINGS_X_BY_USERS", "pkl")) # data.dump_pickle(globals.MEAN_RATINGS_ITEM, data.generate_file_name("MEAN_RATINGS_ITEM", "pkl")) # Build IICF model: # build_model_iicf(movies_pkl, ratings_pkl) # Dump the IICF model to pickle into file system (just for the first time): # data.dump_pickle(globals.IICF_MODEL, data.generate_file_name("IICF-model", "pkl")) # Question 1: print "Question 1: Pearson correlation (without significance weighting) user 1 and 4." common_ratings, amount = utils.find_common_ratings(1, 4, None) p = calculate_pearson_correlation(common_ratings, 1, 4) print "\t| Result =", p # Question 2: print "Question 2: Pearson correlation (with significance weighting) user 1 and 4." common_ratings, amount = utils.find_common_ratings(1, 4, None) p = calculate_pearson_correlation(common_ratings, 1, 4) result = p * calculate_significance_weighing_factor(1,4,ratings_pkl,amount) print "\t| Result =", result # Question 5: print "Question 5: Amount of neighbors (strict positive similarity) user 1 and item 10." neighborsIDs, neighbors_data = top_k_most_similar_neighbors(1,10,1000) print "\t| # of neighbors = ", len(neighborsIDs) # Question 6: print "Question 6: List those neighbors." neighborsIDs, neighbors_data = top_k_most_similar_neighbors(1, 10) for item in neighborsIDs: print "\t| ID: ", item, " | Pearson: ", neighbors_data[item]['pearson'] # Question 7: print "Question 7: Weighted average of the deviation from mean rating. Neighbors of user 1 item 10." pre = rating_prediction_user(1,10,neighborsIDs,neighbors_data) print "\t| Result = ", pre # Question 8: print "Question 8: Rating prediction user 1 item 10." pre = rating_prediction_user(1,10,neighborsIDs,neighbors_data) print "\t| Result = ", pre # Question 9: print "Question 9: Title item 10." print "\t Title = ", movies_pkl['10']['title'] # Question 10: print "Question 10: Amount of neighbors (strict positive similarity) user 1 and item 260." neighborsIDs, neighbors_data = top_k_most_similar_neighbors(1,260,1000) print "\t| # of neighbors = ", len(neighborsIDs) # Question 11: print "Question 11: List those neighbors." neighborsIDs, neighbors_data = top_k_most_similar_neighbors(1, 260) for item in neighborsIDs: print "\t| ID: ", item, " | Pearson: ", neighbors_data[item]['pearson'] # Question 12: print "Question 12: Weighted average of the deviation from mean rating. Neighbors of user 1 item 260." pre = rating_prediction_user(1, 260, neighborsIDs, neighbors_data) print "\t| Result = ", pre # Question 13: print "Question 13: Rating prediction user 1 item 260." pre = rating_prediction_user(1, 260, neighborsIDs, neighbors_data) print "\t| Result = ", pre # Question 14: print "Question 14: Title item 260." print "\t Title = ", movies_pkl['260']['title'] # Question 16: print "Question 16: Top-N recommendations user 1." topn = topN_recommendations_uucf(1,movies_pkl) for item in topn: print "\t| ({0},{1},{2})".format(item[0], item[1], item[2]) # Question 17: print "Question 17: Top-N recommendations user 522." topn = topN_recommendations_uucf(522,movies_pkl) for item in topn: print "\t| ({0},{1},{2})".format(item[0], item[1], item[2]) # Question 19: print "Question 19: IICF model. Strict positive similarities." globals.IICF_MODEL = data.load_pickle(IICF_MODEL_NAME) # load the IICF model print "\t| Result = ", len(globals.IICF_MODEL[1]) # Question 20: print "Question 20: Cosine similarity between items 594 and 596." sim = globals.IICF_MODEL[1][594][596] print "\t| Similarity = ", sim print "\t| Movie 594 =", movies_pkl['594']['title'] print "\t| Movie 596 =", movies_pkl['596']['title'] # Question 21: print "Question 21: Rating prediction for user 522 and item 25. Similar neighbors rated by user:"******"\t| Result = ", len(result) # Question 22: print "Question 22: Top-k similar items for user 522 and item 25." globals.SIMILARITY_TYPE.setPositive() topk = top_k_most_similar_items(522, 25) for item in topk: print "\t| ({0} , {1} , {2})".format(item[0], movies_pkl[str(item[0])]['title'], item[1]) # Question 24: print "Question 24: Top-N recommendations for user 522." globals.SIMILARITY_TYPE.setPositive() topn = topN_recommendations_iicf(522, movies_pkl) for item in topn: print "\t| ( {0} , {1}, {2} )".format(item[0], item[1], item[2]) # Question 25: print "Question 25: Top-N recommendations basket items: [1]." globals.SIMILARITY_TYPE.setPositive() topn = topN_recommendations_basket([1], movies_pkl) for item in topn: print "\t| ( {0} , {1}, {2} )".format(item[0], item[1], item[2]) # Question 26: print "Question 26: Top-N recommendations basket items: [1, 48, 239]." globals.SIMILARITY_TYPE.setPositive() topn = topN_recommendations_basket([1, 48, 239], movies_pkl) for item in topn: print "\t| ( {0} , {1}, {2} )".format(item[0], item[1], item[2]) # Question 27: print "Question 27: Top-N recommendations basket items: [1, 48, 239] plus negative similarities." globals.SIMILARITY_TYPE.setBoth() topn = topN_recommendations_basket([1, 48, 239], movies_pkl) for item in topn: print "\t| ( {0} , {1}, {2} )".format(item[0], item[1], item[2]) # Question 29: print "Question 29: Top-N recommendations hybrid for user 522." globals.SIMILARITY_TYPE.setPositive() topn = topN_recommendations_hybrid(522,movies_pkl) for item in topn: print "\t| ( {0} , {1}, {2} )".format(item[0], item[1], item[2])