def loadMoviesFromServer(genre=None): conn = projSql.get_sql_connection() data = conn.cursor() if genre == None: sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid" else: sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid WHERE " + genre + " = 1"
def getRecommendedItems(user,genre=None): conn = projSql.get_sql_connection() data = conn.cursor() sql = "SELECT r.movieid, r.rating FROM ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r WHERE r.userid =" + str(user) data.execute(sql) scores={} totalSim={} userRatings = {} # Loop over items rated by this user for (movie,rating) in data: userRatings[movie] = rating for (movie,rating) in userRatings.items(): # Loop over items similar to this one movie_sim = getMovieSimilarities(movie,genre) for (movie2, similarity) in movie_sim.items(): # Ignore if this user has already rated this item if movie2 in userRatings: continue # Weighted sum of rating times similarity scores.setdefault(movie2,0) scores[movie2]+=similarity*rating # Sum of all the similarities totalSim.setdefault(movie2,0) totalSim[movie2]+=similarity # Divide each total score by total weighting to get an average rankings=[(score/totalSim[movie],movie) for movie,score in scores.items() if score!=0] # Return the rankings from highest to lowest rankings.sort() rankings.reverse() recommend = [str(movie) for (score,movie) in rankings] return recommend
def getRecommendedItems(user, genre=None): conn = projSql.get_sql_connection() data = conn.cursor() sql = "SELECT r.movieid, r.rating FROM ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r WHERE r.userid =" + str( user) data.execute(sql) scores = {} totalSim = {} userRatings = {} # Loop over items rated by this user for (movie, rating) in data: userRatings[movie] = rating for (movie, rating) in userRatings.items(): # Loop over items similar to this one movie_sim = getMovieSimilarities(movie, genre) for (movie2, similarity) in movie_sim.items(): # Ignore if this user has already rated this item if movie2 in userRatings: continue # Weighted sum of rating times similarity scores.setdefault(movie2, 0) scores[movie2] += similarity * rating # Sum of all the similarities totalSim.setdefault(movie2, 0) totalSim[movie2] += similarity # Divide each total score by total weighting to get an average rankings = [(score / totalSim[movie], movie) for movie, score in scores.items() if score != 0] # Return the rankings from highest to lowest rankings.sort() rankings.reverse() recommend = [str(movie) for (score, movie) in rankings] return recommend
def getMovieSimilarities(movieid,genre=None): conn = projSql.get_sql_connection() data = conn.cursor() if genre==None: sql = "SELECT movie_id2, similarity FROM movie_similarities WHERE movie_id1 = " + str(movieid) else: sql = "SELECT movie_id2, similarity FROM movie_similarities as s JOIN movies as m on s.movie_id2 = m.id WHERE movie_id1 = " + str(movieid) + ' AND ' + genre + "=1" data.execute(sql) movie_sim = {} for (movie_id2, similarity) in data: movie_sim[movie_id2] = similarity return movie_sim
def loadMoviesFromServer(genre=None): conn = projSql.get_sql_connection() data = conn.cursor() if genre == None: sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid" else: sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid WHERE " + genre + " = 1" data.execute(sql) prefs = {} for row in data: (movieid, userid, rating) = row prefs.setdefault(userid, {}) prefs[userid][movieid] = float(rating) return prefs
def loadMoviesFromServer(genre=None): conn = projSql.get_sql_connection() data = conn.cursor() if genre == None: sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid" else: sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid WHERE " + genre + " = 1" data.execute(sql) prefs = {} for row in data: (movieid, userid, rating) = row prefs.setdefault(userid,{}) prefs[userid][movieid]=float(rating) return prefs
def getMovieSimilarities(movieid, genre=None): conn = projSql.get_sql_connection() data = conn.cursor() if genre == None: sql = "SELECT movie_id2, similarity FROM movie_similarities WHERE movie_id1 = " + str( movieid) else: sql = "SELECT movie_id2, similarity FROM movie_similarities as s JOIN movies as m on s.movie_id2 = m.id WHERE movie_id1 = " + str( movieid) + ' AND ' + genre + "=1" data.execute(sql) movie_sim = {} for (movie_id2, similarity) in data: movie_sim[movie_id2] = similarity return movie_sim
def calculateSimilarItems(prefs): # Create a dictionary of items showing which other items they # are most similar to. result={} c=0 conn = projSql.get_sql_connection() data = conn.cursor() for movie_id1 in prefs: # Status updates for large datasets c+=1 if c%100==0: print "%d / %d" % (c,len(prefs)) # Find the most similar items to this one scores=topMatches(prefs,movie_id1,similarity=sim_distance) result[movie_id1]=scores for item in scores: movie_id2 = item[1] similarity = item[0] sql = "REPLACE INTO movie_similarities(movie_id1,movie_id2,similarity) VALUES (" + str(movie_id1) + "," + str(movie_id2) + "," + str(similarity) + ")" data.execute(sql)
def calculateSimilarItems(prefs): # Create a dictionary of items showing which other items they # are most similar to. result = {} c = 0 conn = projSql.get_sql_connection() data = conn.cursor() for movie_id1 in prefs: # Status updates for large datasets c += 1 if c % 100 == 0: print "%d / %d" % (c, len(prefs)) # Find the most similar items to this one scores = topMatches(prefs, movie_id1, similarity=sim_distance) result[movie_id1] = scores for item in scores: movie_id2 = item[1] similarity = item[0] sql = "REPLACE INTO movie_similarities(movie_id1,movie_id2,similarity) VALUES (" + str( movie_id1) + "," + str(movie_id2) + "," + str(similarity) + ")" data.execute(sql)
import json import projSql import time start_time = time.clock() conn = projSql.get_sql_connection() data = conn.cursor() sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid" data.execute(sql) prefs = {} for row in data: (movieid, userid, rating) = row prefs.setdefault(userid,{}) prefs[userid][movieid]=float(rating) with open('user_ratings.json', 'w') as outfile: json.dump(prefs, outfile) print("Time taken: "+str(time.clock() - start_time)+" seconds.\n")
import json import projSql import time start_time = time.clock() conn = projSql.get_sql_connection() data = conn.cursor() sql = "SELECT m.id,r.userid,r.rating FROM movies AS m JOIN ((SELECT * FROM reviewsdata) UNION (SELECT * FROM user_reviews)) AS r ON m.id = r.movieid" data.execute(sql) prefs = {} for row in data: (movieid, userid, rating) = row prefs.setdefault(userid, {}) prefs[userid][movieid] = float(rating) with open('user_ratings.json', 'w') as outfile: json.dump(prefs, outfile) print("Time taken: " + str(time.clock() - start_time) + " seconds.\n")