def similarity(self, other): """Return Pearson rating for user compared to other user.""" # create empty dictionary for user's movie ratings with movie_id as key, # rating object as value user_dict = {r.movie_id : r for r in self.ratings} print user_dict # create empty list to hold tuples of (user rating, other person's rating) paired_ratings = [] # iterate through ratings of the second user and find pairs where # both users have watched the same movie for r in other.ratings: # checks movie_id key against user's rating dictionary, bind value # associated wtih the key u_r = user_dict.get(r.movie_id) # if the value is not none, then a tuple pair is appended to the paired list if u_r: paired_ratings.append((u_r.score, r.score)) print paired_ratings # once the for loop ends, if the paired list is not empty # we calculate the pearson correlation between user and other if paired_ratings: print pearson(paired_ratings) return pearson(paired_ratings) # if the paired list is empty, it means that user and other # have rated no movies in common and a pearson correlation of 0 is returned else: return 0.0
def get_user_recs(self): """Find other users who are most similar in their ratings to self.""" UserRestaurants = db.aliased(Rating) RestaurantUsers = db.aliased(Rating) SimilarUsers = db.aliased(Rating) query = (db.session.query( Rating.user_id, Rating.user_rating, UserRestaurants.user_rating, UserRestaurants.restaurant_id, RestaurantUsers.user_rating, RestaurantUsers.user_id).join( UserRestaurants, UserRestaurants.restaurant_id == Rating.restaurant_id).join( RestaurantUsers, Rating.user_id == RestaurantUsers.user_id).filter( UserRestaurants.user_id == self.user_id)) paired_ratings = defaultdict(list) # pairs are being duplicated (why?) # create pairs of self's ratings and other user's ratings of restaurants for (rating_user_id, rating_user_rating, user_restaurants_user_rating, user_restaurants_restaurant_id, restaurant_users_user_rating, restaurant_users_user_id) in query: paired_ratings[rating_user_id].append( (user_restaurants_user_rating, rating_user_rating)) # find most similar users to self similarities = [] for user, pairs in paired_ratings.iteritems(): sim_score = pearson(pairs) similarities.append((pearson(pairs), user)) similarities.sort(reverse=True) sim_users = similarities[:len(similarities) / 2 + 1] sim_users_id = [s[1] for s in sim_users] # use sim_users_id to generate user-based recommendations self_restaurants = set(r.restaurant_id for r in self.ratings) sim_user_restaurants = defaultdict(set) recommendations = set() for s in sim_users_id: sim_user_restaurants = (db.session.query(Rating, Restaurant).join( Restaurant, Restaurant.restaurant_id == Rating.restaurant_id).filter( Rating.user_id == s)) for rating, restaurant in sim_user_restaurants: if rating.restaurant_id not in self_restaurants and rating.user_rating >= 4: recommendations.add(restaurant) return list(recommendations)
def test_pearson(self): """Test pearson function.""" pairs_1 = [(1.0, 1.0), (3.0, 3.0), (4.0, 4.0), (2.0, 2.0)] pairs_2 = [(1.0, 5.0), (3.0, 3.0), (1.0, 4.0), (3.0, 2.0)] pairs_3 = [(1.0, 2.0), (3.0, 3.0), (1.0, 1.0), (3.0, 2.0)] pairs_4 = [(0.0, 5.0), (0.0, 2.0), (0.0, 1.0), (0.0, 3.0)] self.assertEqual(pearson(pairs_1), 1.0) self.assertEqual(pearson(pairs_2), -0.8944271909999159) self.assertEqual(pearson(pairs_3), 0.7071067811865475) self.assertEqual(pearson(pairs_4), 0)
def similarity(self, other): """Find the pearson correlation between self & other user""" # 1. {} # 2. [()] # go through all the ratings & add to dict w/ movie_id : rating # check if other user has rated the same movie, # if yes, push pair into list of tuples # if the list exists, # pass list through pearson coeff, # if it is empty, # else, return 0 u_ratings = {} pairs = [] for rating in self.ratings: u_ratings[rating.movie_id] = rating.rating for r in other.ratings: # print "I am other user's rating!!\n\n\n" # print r if r.movie_id in u_ratings.keys(): pairs.append((u_ratings[r.movie_id], r.rating)) if pairs: return correlation.pearson(pairs) else: return 0.0
def similarity(self, other): # create empty list for user 1's ratings u_ratings = {} # for each rating in user 1's ratings for r in self.ratings: # add to dictionary u_ratings key value pair of movie_id and associated rating object u_ratings[r.movie_id] = r #Why do we do the above inside this function, as opposed to adding u_ratings as an attribute of User class? # # Peel off first user from other_users # o = other_users[0] # create empty list of paired ratings paired_ratings = [] # for each rating in the other user's ratings for o_rating in other.ratings: #look in user 1's dictionary to see if other user's rating's movie_id exists u_rating = u_ratings.get(o_rating.movie_id) #if user 1 has rated the movie (meaning user u_rating exists) if u_rating: #create a pair of scores from user 1 and other user pair = (u_rating.score, o_rating.score) # append pair from ^^ to paired_ratings list paired_ratings.append(pair) if paired_ratings: return pearson(paired_ratings) else: return 0.0
def similarity(self, other): u_ratings = {} previous_similarity = Similarity.search(self.id, other.id) if previous_similarity: return previous_similarity.similarity else: for rating in self.ratings: u_ratings[rating.movie_id] = rating.rating paired_rating = [] for r in other.ratings: u_rating = u_ratings.get(r.movie_id) if u_rating: paired_rating.append((u_rating, r.rating)) if paired_rating: r = correlation.pearson(paired_rating) if previous_similarity: previous_similarity.similarity = r previous_similarity.timestamp = datetime.datetime.now() return r else: Similarity.new(self.id, other.id, r) return r else: if previous_similarity: previous_similarity.similarity = 0.0 previous_similarity.timestamp = datetime.datetime.now() return 0.0 else: Similarity.new(self.id, other.id, 0.0) return 0.0
def predict(movie_id): target_movie = Movies.get(movie_id) #target_movie = get_movie(movie_id) target_ratings = make_target_ratings(movie_id) # target_movie_rating = rating my_movie_ids = movie_ids(0) movies_to_compare = [] for id in my_movie_ids: ratings = make_target_ratings(id) movies_to_compare.append(ratings) similarities = [] for ratings in movies_to_compare: our_rating = ratings[0] # ratings is not a list, it is a dictionary similarity = pearson(target_ratings, ratings) tup = (similarity, our_rating) similarities.append(tup) # (similarity, ranking) top_five = sorted(similarities) top_five.reverse() top_five = top_five[:5] num = 0.0 den = 0.0 # Use a weighted mean rather than a strict top similarity for sim, m in top_five: num += (float(sim) * m) den += sim rating = num/den print "Best guess for movie %d: %s is %.2f stars"%\ (movie_id, target_movie['title'], rating)
def predict(movie_id): ratings = get_ratings(movie_id=movie_id) target_movie = get_movie(movie_id) my_movie_ids = get_my_movies() #list of movies target_film_ratings = convert_ratings_to_dict(ratings) comparison_film_ratings = get_all_rating_dicts(my_movie_ids) similarities = [] for base_movie in my_movie_ids: base_movie_ratings = convert_ratings_to_dict(get_ratings(movie_id=base_movie)) # print "pearson score", pearson(target_film_ratings, base_movie_ratings) similarities.append((pearson(target_film_ratings, base_movie_ratings), get_rating(base_movie, 0))) similarities.sort() similarities.reverse() print "TYPE TWO", type(similarities) # top_five = similarities[-5:] num = 0.0 den = 0.0 # Use a weighted mean rather than a strict top similarity for sim, m in top_five: num += (float(sim) * m) den += sim rating = num/den print "Best guess for movie %d: %s is %.2f stars"%\ (movie_id, target_movie['title'], rating)
def similarity(self, other_movie): movie_ratings = {} paired_ratings = [] # loop through all the ratings for self (the movie in question) for rating in self.ratings: # get all the user_ids and their ratings for that movie (self) # dictionary key = user_id, value = rating, for self movie_ratings[rating.user_id] = rating.rating # loop through all the ratings for the other movie (being compared to self) for rating in other_movie.ratings: # for each rating, if a user has rated this AND rated self (check dictionary), # store that rating in movie_rating movie_rating = movie_ratings.get(rating.user_id, False) # if movie_rating exists (there was a match - this user rated both self and other_movie) if movie_rating: # add the tuple of movie_rating (self rating, and other_movie's rating) paired_ratings.append( (movie_rating, rating.rating) ) if paired_ratings: return correlation.pearson(paired_ratings) else: return 0.0
def similarity(self, other): """Return Pearson rating for user compared to other user. u_ratings is a dictionary with the format: u_ratings = {movie_id: <rating object>, movie_id: <rating object>, movie_id: <rating object>} """ u_ratings = {} paired_ratings = [] for rating_obj in self.ratings: u_ratings[rating_obj.movie_id] = rating_obj for rating_obj in other.ratings: dict_value = u_ratings.get( rating_obj.movie_id ) # have they rated the same movie that u_ratings did if dict_value: paired_ratings.append((dict_value.score, rating_obj.score)) if paired_ratings: return pearson(paired_ratings) else: return 0.0
def similarity(self, other): """Return Pearson rating for user compared to other user. Takes self and second user""" user_ratings = {} paired_ratings = [] for rating in self.ratings: # Create our keys of movie_ids for movies we've seen user_ratings[rating.movie_id] = rating for other_rating in other.ratings: # Get the rating for a movie we've rated that's in the other # user's movie ratings user_rating = user_ratings.get(other_rating.movie_id) # If there's actually a rating if user_rating: # Add it to the list of pairs paired_ratings.append( (user_rating.score, other_rating.score) ) # If we have any pairs to get similarity with, get the correlation if paired_ratings: return correlation.pearson(paired_ratings) # Else, return no correlation else: return 0.0
def assess_similarity(self, other): # user1 and user2 are user objects # make an empty dictionary to hold user1's ratings and an empty list # for pairs of scores for movies shared by user1 and user2 user_ratings_dict = {} paired_ratings = [] # Iterate over the list of rating objects for user1 - ie. all ratings # user1 has done, add to dictionary: KEY: movie_id, VALUE: rating object for r in self.ratings: user_ratings_dict[r.movie_id] = r # Iterate over the list of rating objects for user2 # Checking to see if movie_id for that rating object is in our # dictionary for user1's ratings - ie. checking if user1 rated # that movie. Assigning User1's rating object to the variable # u_rating for that movie for other_rating in other.ratings: u_rating = user_ratings_dict.get(other_rating.movie_id) # If user1 has not rated that movie, skip. Otherwise... if u_rating is not None: # Assign a tuple of the two users' scores to variable "pair", # and append tuple to "pairings" list. pair = (u_rating.score, other_rating.score) paired_ratings.append(pair) if paired_ratings: return correlation.pearson(paired_ratings) else: return 0.0
def similarity(self, other): """Return Pearson rating for user compared to other User.""" # create a dictionary to store current user's {movie_id: score} u_ratings = {} # create a list to store tuples of # (current user's rating, other user's rating) paired_ratings = [] # loop through all Rating object of current user to add to # the recently created dictionary of his/her movie scores for r in self.ratings: u_ratings[r.movie_id] = r # loop through other user's list of Rating objects for r in other.ratings: # get Rating object for any movies that match a movie # that the current user has already rated u_r = u_ratings.get(r.movie_id) # if there is a Rating object with the same movie id... if u_r: # create a tuple with (other's score, user's score) paired_ratings.append( (u_r.score, r.score) ) # after looping all Ratings, if there were any movie matches, # run the Pearson correlation math with the matches # return the correlation value between other & current users if paired_ratings: return correlation.pearson(paired_ratings) # if no same movies were rated, return a correlation of 0 else: return 0.0
def similarity(self, movie_scores): """Find similatiry between two users""" # print ' enter similatiry' ratings = db.session.execute( """\ SELECT movie_id, score FROM ratings WHERE user_id = :curr_user ORDER BY movie_id\ """, { 'curr_user': self.user_id }).fetchall() u_ratings = {} paired_ratings = [] # print ' init sim vals' for movie_id, score in ratings: u_ratings[movie_id] = score # print ' get user ratings' for movie_id, o_score in movie_scores.items(): u_rating = u_ratings.get(movie_id) if u_rating: paired_ratings.append((u_ratings[movie_id], o_score)) # print ' get other ratings' # print time_diff if paired_ratings: # print ' exit similarity - found paired_ratings' return correlation.pearson(paired_ratings) # print ' exit similarity - no paired_ratings' return 0.0
def similarity(self, other): user_ratings = {} # this will be a list of the ratings both self and other (user) have made where they match for the same movie paired_ratings = [] for one_rating in self.ratings: # here we are putting a rating into our dictionary (the key is the movie title) and the value is the rating object (from our list) if isinstance(self, User): user_ratings[one_rating.movie_id] = one_rating if isinstance(self, Movie): user_ratings[one_rating.user_id] = one_rating for a_rating in other.ratings: # for each rating of the other user, check to see if our user has rated it (aka it's in the dictionary we just made.) this is our "rating buddy". If it's there, it'll be a rating object. if isinstance(self, User): rating_buddy = user_ratings.get(a_rating.movie_id) if isinstance(self, Movie): rating_buddy = user_ratings.get(a_rating.user_id) # if we have a match (rating buddy), append that to our list of pairs/matches (aka both you and the other user rated this movie) if rating_buddy: paired_ratings.append((rating_buddy.rating, a_rating.rating)) # if we have any paired ratings, pass them to the pearson correlation if paired_ratings: return correlation.pearson(paired_ratings) # otherwise return 0 (no correlation?) else: return 0.0
def predict_rating(self, movie): """Predict user's rating of a movie.""" # # option 1: SQLAlchemy ORM # UserMovies = db.aliased(Rating) MovieUsers = db.aliased(Rating) query = (db.session.query( Rating.user_id, Rating.score, UserMovies.score, MovieUsers.score).join( UserMovies, UserMovies.movie_id == Rating.movie_id).join( MovieUsers, Rating.user_id == MovieUsers.user_id).filter( UserMovies.user_id == self.user_id).filter( MovieUsers.movie_id == movie.movie_id)) # # option 2: raw SQL # # sql = """ # SELECT ratings.user_id, ratings.score, user_movies.score, movie_users.score # FROM ratings AS user_movies # JOIN ratings # ON (user_movies.movie_id = ratings.movie_id) # JOIN ratings AS movie_users # ON (ratings.user_id = movie_users.user_id) # WHERE user_movies.user_id = :user_id # AND movie_users.movie_id = :movie_id # """ # # query = db.session.execute(sql, dict(user_id=self.user_id, movie_id=movie.movie_id)) # known_ratings = {} paired_ratings = defaultdict(list) for rating_user_id, rating_score, user_movie_score, movie_user_score in query: paired_ratings[rating_user_id].append( (user_movie_score, rating_score)) known_ratings[rating_user_id] = movie_user_score similarities = [] for _id, score in known_ratings.items(): similarity = correlation.pearson(paired_ratings[_id]) if similarity > 0: similarities.append((similarity, score)) if not similarities: return None numerator = sum([score * sim for sim, score in similarities]) denominator = sum([sim for sim, score in similarities]) return numerator / denominator
def calculate_max_pearson_score(score_corr): pearson_scores = {} for user_id, all_paired_ratings in score_corr.iteritems(): pearson_score = correlation.pearson(all_paired_ratings) pearson_scores[pearson_score] = user_id max_pearson_score = max(pearson_scores.keys()) return max_pearson_score, pearson_scores[max_pearson_score]
def calc_pearson_corr(wanted_user, other_users): """Takes wanted user and list of other users and returns correlation""" correlations = [] for other_user in other_users: pairs = make_paired_ratings(wanted_user, other_user) if pairs: correlation = pearson(pairs) correlations.append((other_user.user_id, correlation)) return correlations
def similarity(self, other_user): pairs = [] user_dict = {} for r in self.ratings: user_dict[r.movie_id] = r.rating for r in other_user.ratings: if user_dict.get(r.movie_id): pairs.append((user_dict[r.movie_id], r.rating)) if pairs: return correlation.pearson(pairs) else: return 0.0
def similarity(self, u_ratings, other): paired_ratings = [] for r in other.ratings: u_r = u_ratings.get(r.movie_id) if u_r: paired_ratings.append((u_r.rating, r.rating)) if paired_ratings: return correlation.pearson(paired_ratings) else: return 0.0
def similarity(user1, user2): user1_dict = {} pair_list = [] for r in user1.ratings: user1_dict[r.movie_id] = r for r in user2.ratings: user1_rating = user1_dict.get(r.movie_id) if user1_rating: pair_list.append((r.rating, user1_rating.rating)) if pair_list: return correlation.pearson(pair_list) else: return 0.0
def similarity(self, other_user): d = {} rating_pairs = [] for self_rating in self.ratings: d[self_rating.movie_id] = self_rating.movie_rating for other_user_rating in other_user.ratings: if (other_user_rating.movie_id in d) == True: rating_pairs.append((other_user_rating.movie_rating, d.get(other_user_rating.movie_id))) if rating_pairs: return correlation.pearson(rating_pairs) else: return 0.0
def similarity (self, movie2): self_dict = {} pair_list = [] for r in self.ratings: self_dict[r.user_id] = r for r in movie2.ratings: self_rating = self_dict.get(r.user_id) if self_rating: pair_list.append( (r.rating, self_rating.rating) ) if pair_list: return correlation.pearson(pair_list) else: return 0.0
def similarity(self, movie2): self_dict = {} pair_list = [] for r in self.ratings: self_dict[r.user_id] = r for r in movie2.ratings: self_rating = self_dict.get(r.user_id) if self_rating: pair_list.append((r.rating, self_rating.rating)) if pair_list: return correlation.pearson(pair_list) else: return 0.0
def similarity (user1, user2): user1_dict = {} pair_list = [] for r in user1.ratings: user1_dict[r.movie_id] = r for r in user2.ratings: user1_rating = user1_dict.get(r.movie_id) if user1_rating: pair_list.append( (r.rating, user1_rating.rating) ) if pair_list: return correlation.pearson(pair_list) else: return 0.0
def similarity(self, user2): paired_ratings = [] d = {} for rating1 in self.ratings: d[rating1.movie_id] = rating1 for rating2 in user2.ratings: user_rating = d.get(rating2.movie_id) if user_rating: paired_ratings.append((user_rating.rating, rating2.rating)) if paired_ratings: return correlation.pearson(paired_ratings) else: return 0.0
def record_correlation(s, t, overlap=300): """Return the correlation between the monthly anomalies of the two records, where they have common months.""" assert s.first_year == t.first_year a = list(s.series) b = list(t.series) series.anomalize(a) series.anomalize(b) common = [(u,v) for u,v in zip(a, b) if valid(u) and valid(v)] if len(common) < overlap: return None return correlation.pearson(*zip(*common))
def similarity(self, user2): user_ratings = {} rating_pairs = [] for r in self.ratings: user_ratings[r.movie_id] = r.rating for r in user2.ratings: if r.movie_id in user_ratings: rating_pairs.append((r.rating, user_ratings[r.movie_id])) if rating_pairs: return correlation.pearson(rating_pairs) else: return 0.0
def similarity(self, other): my_ratings = {} paired_ratings = [] for r in self.ratings: my_ratings[r.movie_id] = r for o in other.ratings: matching_rating = my_ratings.get(o.movie_id) if matching_rating: paired_ratings.append( (matching_rating.rating, o.rating) ) if paired_ratings: return correlation.pearson(paired_ratings) else: return 0.0
def similarity(self, other): u_ratings = {} # empty dict paired_ratings = [] # empty list for r in self.ratings: # for each rating in user1's ratings u_ratings[r.movie_id] = r # append {movie-id:rating-object} to dict for o_r in other.ratings: # for each rating in user2's ratings u_r = u_ratings.get(o_r.movie_id) # query, set u_r to rating if user1 has rated that movie if u_r: # if u_r has a value paired_ratings.append( (u_r.rating, o_r.rating) ) # append a tuple of user1 and user2's ratings if paired_ratings: # if user1 and user2 have both rated at least one movie in common return correlation.pearson(paired_ratings) # return their similarity coefficient else: return 0.0
def similarity(self, other): u_movies = {} for rating in self.ratings: u_movies[rating.movie_id] = rating.rating rating_pairs = [] for rating in other.ratings: u_rating = u_movies.get(rating.movie_id) if u_rating: rating_pairs.append((u_rating, rating.rating)) if rating_pairs: return correlation.pearson(rating_pairs) else: return 0.0
def similarity(self, other_user): """Determine how similar two users' tastes in movies are.""" pairs = [] my_ratings = User.generate_dict_of_ratings(self) other_ratings = User.generate_dict_of_ratings(other_user) for movie_id, my_score in my_ratings.iteritems(): if movie_id in other_ratings: pairs.append((my_score, other_ratings[movie_id])) if pairs: return correlation.pearson(pairs) else: return 0
def similarity(user1, user2): u_ratings = {} paired_ratings = [] for r in user1.data: u_ratings[r.item_id] = r for r in user2.data: u_r = u_ratings.get(r.item_id) if u_r: paired_ratings.append( (u_r.rating, r.rating) ) if paired_ratings: return correlation.pearson(paired_ratings) else: return 0.0
def similarity(self, other): user_ratings = {} paired_ratings = [] for rating in self.ratings: user_ratings[rating.movie_id] = rating for rating in other.ratings: overlapping_ratings = user_ratings.get(rating.movie_id) if overlapping_ratings: paired_ratings.append( (overlapping_ratings.rating, rating.rating) ) if paired_ratings: return pearson(paired_ratings) else: return 0.0
def similarity(self, other): m_ratings = {} paired_ratings = [] for r in self.ratings: m_ratings[r.user_id] = r for r in other.ratings: m_r = m_ratings.get(r.user_id) if m_r: paired_ratings.append((m_r.rating, r.rating)) if paired_ratings: return pearson(paired_ratings) else: return 0.0
def similarity(self, other): user_ratings = {} paired_ratings = [] for rating in self.ratings: user_ratings[rating.movie_id] = rating for rating in other.ratings: user_rating = user_ratings.get(rating.movie_id) if user_rating: paired_ratings.append( (user_rating.rating, rating.rating) ) if paired_ratings: return correlation.pearson(paired_ratings) else: return 0.0
def similarity(self, other): u_ratings = {} paired_ratings = [] for r in self.ratings: if r.rating != 0: u_ratings[r.beer_id] = r for r in other.ratings: if r.rating != 0: u_r = u_ratings.get(r.beer_id) if u_r: paired_ratings.append( (u_r.rating, r.rating) ) if paired_ratings: return correlation.pearson(paired_ratings) else: return 0.0
def similarity(self, other): u_ratings = {} paired_ratings = [] for r in self.ratings: u_ratings[r.movie_id] = r for r in other.ratings: u_r = u_ratings.get(r.movie_id) if u_r: paired_ratings.append((u_r.rating, r.rating)) if paired_ratings: return correlation.pearson(paired_ratings) else: return 0.0
def similarity(user1, user2): u_ratings = {} paired_ratings = [] for r in user1.ratings: u_ratings[r.movie_id] = r for r in user2.ratings: u_r = u_ratings.get(r.movie_id) if u_r: paired_ratings.append((u_r.rating, r.rating)) if paired_ratings: return correlation.pearson(paired_ratings) else: return 0.0
def similarity(self, other): u_ratings = {} paired_ratings = [] for r in self.ratings: u_ratings[r.movie_id] = r for r in other.ratings: u_r = u_ratings.get(r.movie_id) if u_r: paired_ratings.append( (u_r.rating, r.rating) ) if paired_ratings: return pearson(paired_ratings) else: return 0.0
def predict(movie_id): ratings = get_ratings(movie_id=movie_id) target_movie = get_movie(movie_id) for movie in rated_movies: similarities = [ (pearson({}, {}) rating) for target_movie_id, rating in movie_pairs] top_five = sorted(similarities) top_five.reverse() top_five = top_five[:5] num = 0.0 den = 0.0 # Use a weighted mean rather than a strict top similarity for sim, m in top_five: num += (float(sim) * m) den += sim rating = num/den print "Best guess for movie %d: %s is %.2f stars"%\ (movie_id, target_movie['title'], rating) def parse(line, dispatch): tokens = line.split() if not tokens: return error() cmd = tokens[0] command = dispatch.get(cmd) if not command: return error() if len(tokens) != len(command): return error("Invalid number of arguments") function = command[0] if len(command) == 1: return function() try: type_tuples = zip(command[1:], tokens[1:]) typed_arguments = [ _type(arg) for _type, arg in type_tuples ] return function(*typed_arguments) except Exception, e: traceback.print_exc() return error("Invalid argument to %s"%(cmd))
def similarity(self, other): """Return Pearson rating for user compared to other user.""" u_ratings = {} paired_ratings = [] for r in self.ratings: u_ratings[r.movie_id] = r for r in other.ratings: u_r = u_ratings.get(r.movie_id) if u_r: paired_ratings.append( (u_r.score, r.score) ) if paired_ratings: return correlation.pearson(paired_ratings)
def similarity(self,user2): """Produces a pearson coefficient given a user and a second user.""" u_ratings = {} paired_ratings = [] for r in self.ratings: u_ratings[r.movie_id] = r for r in user2.ratings: u_r = u_ratings.get(r.movie_id) if u_r: paired_ratings.append( (u_r.rating, r.rating) ) if paired_ratings: return correlation.pearson(paired_ratings) else: return 0.0
def similarity(self, other): user_ratings = {} paired_ratings = [] for rating in self.ratings: user_ratings[rating.movie_id] = rating for rating in other.ratings: overlapping_ratings = user_ratings.get(rating.movie_id) if overlapping_ratings: paired_ratings.append( (overlapping_ratings.rating, rating.rating)) if paired_ratings: return pearson(paired_ratings) else: return 0.0
def predict_score(self, movie_id): """For a movie, predict's user's score.""" # generate list of ratings objects: all ratings provided by user p_user_movie_ids = [rating.movie_id for rating in self.ratings] # instantiate movie object; build list of ids for all the users who have rated this movie movie = Movie.query.get(movie_id) other_users = [rating.user for rating in movie.ratings] final_pearson = [0, 0] # z_user refers to user we are comparing primary user to for z_user in other_users: pairs = [] # check if each of z_user's ratings is for a movie that p_user has rated for z_rating in z_user.ratings: if z_rating.movie_id in p_user_movie_ids: p_rating = Rating.query.filter( Rating.user_id == self.user_id, Rating.movie_id == z_rating.movie_id).one() pairs.append((p_rating.score, z_rating.score)) # check if p_user and z_user have any overlap. If so: if pairs: r = pearson(pairs) if abs(r) > abs(final_pearson[1]): final_pearson = [z_user.user_id, r] # get z_user rating object for movie in question z_movie_rating = Rating.query.filter( Rating.user_id == final_pearson[0], Rating.movie_id == movie_id).one() print "the pearson coefficient r is ", r print final_pearson print "user z gave this movie", z_movie_rating.score # return score prediction, if Pearson is pos or neg if final_pearson[1] > 0: score_prediction = final_pearson[1] * z_movie_rating.score else: score_prediction = -(final_pearson[1]) * (6 - z_movie_rating.score) print score_prediction return round(score_prediction, 1)
def similarity(self, other): """Return Pearson rating for user compared to other user.""" u_ratings = {} paired_ratings = [] for r in self.ratings: u_ratings[r.movie_id] = r for r in other.ratings: u_r = u_ratings.get(r.movie_id) if u_r: paired_ratings.append((u_r.score, r.score)) if paired_ratings: return pearson(paired_ratings) else: return 0.0
def similarity(self_ratings, o_user): our_ratings = {} for rating in self_ratings: our_ratings[rating.movie_id] = rating.score paired_ratings = [] for other_rating in o_user.ratings: our_rating = our_ratings.get(other_rating.movie_id) if our_rating: pair = (our_rating, other_rating.score) paired_ratings.append(pair) if paired_ratings: return pearson(paired_ratings) else: return 0.0
def similarity(user1, user2): user1_ratings_dict = {} paired_ratings = [] for r in user1.ratings: user1_ratings_dict[r.movie_id] = r for u2_rating in user2.ratings: u1_rating = user1_ratings_dict.get(u2_rating.movie_id) if u1_rating: pair = (u1_rating.score, u2_rating.score) paired_ratings.append(pair) if paired_ratings: return pearson(paired_ratings) else: return 0.0
def feed_pairs_to_pearson(self, user_2): paired_ratings = [] user_rating_dict = {} for rating in self.rating: user_rating_dict[rating.movie_id] = rating for rating in user_2.rating: if rating.movie_id in user_rating_dict: paired_ratings.append((rating.score, user_rating_dict[rating.movie_id].score)) if paired_ratings: return pearson(paired_ratings) else: return 0.0
def similarity(self, other): u_ratings = {} paired_ratings = [] for r in self.ratings: u_ratings[r.movie_id] = r for r in other.ratings: u_rating = u_ratings.get(r.movie_id) if u_rating is not None: pair = (u_rating.score, r.score) paired_ratings.append(pair) result = 0.0 if paired_ratings: result = pearson(paired_ratings) return result
def similarity(self, other): """Return the pearson rating for a user compared to another user""" user_ratings = {} paired_ratings = [] for rating in self.ratings: user_ratings[rating.movie_id] = rating for r in other.ratings: u_r = user_ratings.get(r.movie_id) if u_r is not None: paired_ratings.append((u_r.score, r.score)) if paired_ratings: return pearson(paired_ratings) else: return 0.0
def similarity(self, other): """Return Pearson rating for user compared to other user.""" self_ratings = {} paired_ratings = [] for rating in self.ratings: self_ratings[rating.movie_id] = rating for rating in other.ratings: self_rating = self_ratings.get(rating.movie_id) if self_rating: paired_ratings.append((self_rating.score, rating.score)) if paired_ratings: return correlation.pearson(paired_ratings) else: return 0.0
def similarity(self, user2): #all movies and scores given by user user1_ratings = {} common_ratings = [] #All ratings by user for rating in self.ratings: user1_ratings[rating.movie_id] = rating.score #common movies rated by other user for rating in user2.ratings: user1_score = user1_ratings.get(rating.movie_id) if user1_score: common_ratings.append((user1_score, rating.score)) if common_ratings: return correlation.pearson(common_ratings) else: return 0.0
def predict_rating(self, movie): """Predict user's rating of a movie.""" # import pdb; pdb.set_trace() UserMovies = db.aliased(Rating) MovieUsers = db.aliased(Rating) query = db.session.query(Rating, UserMovies, MovieUsers) \ .join(UserMovies, UserMovies.movie_id == Rating.movie_id) \ .join(MovieUsers, Rating.user_id == MovieUsers.user_id) \ .filter(UserMovies.user_id == self.user_id) \ .filter(MovieUsers.movie_id == movie.movie_id) print query known_ratings = {} paired_ratings = defaultdict(list) for rating, user_movie, movie_user in query: paired_ratings[rating.user_id].append( (user_movie.rating, rating.rating)) known_ratings[rating.user_id] = movie_user.rating similarities = [] print known_ratings for _id, score in known_ratings.iteritems(): similarity = correlation.pearson(paired_ratings[_id]) print similarity if similarity > 0: similarities.append((similarity, score)) print similarities if not similarities: return None numerator = sum([score * sim for sim, score in similarities]) print numerator denominator = sum([sim for sim, score in similarities]) print denominator return numerator / denominator
def similarity(self, user2): """compares user2's ratings to instance's ratings""" self_ratings = {} paired_ratings = [] for r in self.ratings: self_ratings[r.movie_id] = r for u2_rating in user2.ratings: self_rating = self_ratings.get(u2_rating.movie_id) if self_rating: pair = (self_rating.score, u2_rating.score) paired_ratings.append(pair) if paired_ratings: return pearson(paired_ratings) else: return 0.0
def similarity(self, other): """Return Pearson rating for user compared to other user.""" user_ratings = {} paired_ratings = [] for rating in self.ratings: user_ratings[rating.movie_id] = rating for rating in other.ratings: user_rate = user_ratings.get(rating.movie_id) if user_rate: paired_ratings.append( (user_rate.score, rating.score) ) if paired_ratings: return pearson(paired_ratings) else: return 0.0
def similarity(self, other): """Return Pearson rating for user compared to other user""" user_ratings = {} paired_ratings = [] for user_rating in self.ratings: user_ratings[user_rating.movie_id] = user_rating for other_rating in other.ratings: r = user_ratings.get(other_rating.movie_id) # my rating for other's movie if r: paired_ratings.append( (r.score, other_rating.score) ) if paired_ratings: return correlation.pearson(paired_ratings) else: return 0.0