コード例 #1
0
def get_switching_hybrid_recommendations(movies_to_predict, _all_ratings,
                                         _target_user_id, sim_matrix):

    predictions = []
    _limit_top_neighbours_to = 20

    target_user_ratings = _all_ratings[_all_ratings['userID'] ==
                                       _target_user_id]

    for trailer_id, rating in movies_to_predict:

        top_neighbours = []
        # find most similar movies
        for rated_movie in target_user_ratings['id']:

            intersect = pd.merge(
                _all_ratings[_all_ratings['id'] == rated_movie],
                _all_ratings[_all_ratings['id'] == trailer_id],
                on='userID')
            # print intersect
            try:
                sim = cosine_similarity(intersect['rating_x'].reshape(1, -1),
                                        intersect['rating_y'].reshape(1, -1))
                top_neighbours.append((rated_movie, sim[0][0]))
            except ValueError:
                try:
                    sim = sim_matrix[rated_movie][trailer_id]
                    top_neighbours.append((rated_movie, sim))
                except KeyError:
                    continue

        top_n = sort_desc(top_neighbours)[:_limit_top_neighbours_to]

        numerator, denominator = (0, 0)
        for neighbour, sim in top_n:
            user_rating = _all_ratings[(_all_ratings['id'] == neighbour) & (
                _all_ratings['userID'] == _target_user_id)]['rating'].iloc[0]
            numerator += sim * user_rating
            denominator += abs(sim)

        try:
            p_ui = numerator / denominator
        except ZeroDivisionError:
            p_ui = 0

        predictions.append((trailer_id, p_ui))

    return sort_desc(predictions)
コード例 #2
0
def get_tag_based_predictions(user_baseline, movies, all_movies, sim_matrix, _ratings_by_movie, _global_average):

    predictions = []

    for movie in movies:
        p_ui = predict_user_rating(user_baseline, movie[0], [(movieJ[1], sim_matrix[movieJ[0]][movie[0]])
                                                             for movieJ in all_movies if movieJ[0] != movie[0]],
                                   _ratings_by_movie, _global_average)
        if p_ui > 0:
            predictions.append((movie[0], p_ui))
        else:
            predictions.append((movie[0], 0.))


    # predictions = [(movie[0], predict_user_rating(user_baseline, movie[0],
    #                                               [(movieJ[1], sim_matrix[movieJ[0]][movie[0]])
    #                                                for movieJ in all_movies if movieJ[0] != movie[0]],
    #                                               _ratings_by_movie, _global_average))
    #                for movie in movies]
    # predictions = [(movie[0], predict_user_rating(user_baseline, movie[0],
    #                                               [(movieJ[1], sim_matrix[movieJ[0]][movie[0]])
    #                                                for movieJ in all_movies],
    #                                               _ratings_by_movie, _global_average))
    #                for movie in movies]

    # print predictions
    return sort_desc(predictions)
コード例 #3
0
def get_predictions_svd(movies_set, svd_matrix, movies_to_index, user_index,
                        user_average):

    u, s, v = svd_matrix

    predictions = []
    for trailer_id, rating in movies_set:

        try:
            movie_index = movies_to_index[trailer_id]

            p_ui = 0
            for singular_value in range(0, len(s)):
                p_ui += u[user_index][singular_value] * s[singular_value] * v[
                    singular_value][movie_index]
            # p_ui += user_average

            # print p_ui
            # print sum(u[user_index] * s * v[:, movie_index])
            # break
            # p_ui = user_average + np.sum(u[user_index].dot(s.dot(v[:, movie_index])))
        except KeyError:
            p_ui = user_average

        predictions.append((trailer_id, p_ui))

    return sort_desc(predictions)
コード例 #4
0
def get_content_based_user_bof_predictions(_movies_set, _user_avg, _all_ratings, _user_user_sim_matrix, _user_profiles,
                                           _target_user_id):

    predictions = []

    for trailer_id, rating in _movies_set:

        rating_neighbors = _all_ratings[_all_ratings['id'] == trailer_id]
        rating_neighbors_users = list(rating_neighbors['userID'])

        selected_neighbors = [(user, sim, rating_neighbors[rating_neighbors['userID'] == user]['rating'].iloc[0])
                              for user, sim in _user_user_sim_matrix[_target_user_id] if user in rating_neighbors_users]
        # print "Selected Neighbors"
        # print selected_neighbors
        # break

        try:
            # print sum([abs(sim) for u, sim, r in selected_neighbors])
            # break
            p_ui = _user_avg + sum([sim * (_user_profiles.loc[user]['avg'] - user_rating)
                                    for user, sim, user_rating in selected_neighbors]) / \
                               sum([abs(sim) for u, sim, r in selected_neighbors])
        except ZeroDivisionError:
            p_ui = 0
        predictions.append((trailer_id, p_ui))

    return sort_desc(predictions)
コード例 #5
0
def get_user_collaborative_predictions_precomputed_similarities(
        movies_to_predict, _user_profiles, _all_ratings, _target_user_id,
        _user_avg, _user_user_sim_matrix):
    global _avg_ratings

    predictions = []
    _limit_top_neighbours_to = 50

    for trailer_id, rating in movies_to_predict:

        # all neighbours
        rating_neighbors = set(
            _all_ratings[_all_ratings['id'] == trailer_id]['userID'])
        # print len(rating_neighbors), "is the current neighbourhood size"
        # break

        # find top neighbours
        top_neighbors = [
            (neighbor, sim)
            for neighbor, sim in _user_user_sim_matrix[_target_user_id]
            if neighbor in rating_neighbors
        ]

        top_n = sort_desc(top_neighbors)[:_limit_top_neighbours_to]

        # print "Top N", top_n

        # predict rating
        numerator, denominator = (0, 0)
        for neighbour, sim in top_n:

            neighbour_rating = _all_ratings[
                (_all_ratings['userID'] == neighbour)
                & (_all_ratings['id'] == trailer_id)]['rating'].iloc[0]
            numerator += sim * (neighbour_rating -
                                _user_profiles.loc[neighbour]['avg'])
            denominator += abs(sim)
        try:
            p_ui = _user_avg + numerator / denominator
        except ZeroDivisionError:
            p_ui = 0

        predictions.append((trailer_id, p_ui))

    return sort_desc(predictions)
コード例 #6
0
def get_weighted_hybrid_recommendations(predictions, movie_set):

    hybrid_predictions = []
    _num_vectors = 2

    for trailer_id, ratings in movie_set:

        sum_ratings = sum(
            [p_ui for tid, p_ui in predictions if tid == trailer_id])
        hybrid_predictions.append((trailer_id, sum_ratings / _num_vectors))

    return sort_desc(hybrid_predictions)
コード例 #7
0
def get_predictions_linear_regression(movies_set, _deep_features, _user_theta_vectors, userid):

    # predictions = []
    #
    # for trailer_id, rating in movies_set:
    #
    #     p_ui = _user_theta_vectors[userid].dot(np.insert(_deep_features[trailer_id], 0, 1))
    #     predictions.append((trailer_id, p_ui))

    predictions = [(trailer_id, _user_theta_vectors[userid].dot(np.insert(_deep_features[trailer_id], 0, 1)))
                   for trailer_id, r in movies_set]

    return sort_desc(predictions)
コード例 #8
0
def get_content_based_predictions(user_baseline, movies, all_movies, sim_matrix, _ratings_by_movie, _global_average):

    # predictions = [(movie[0], predict_user_rating(user_baseline, movie[0],
    #                                               [(movieJ[1], sim_matrix[movieJ[0]][movie[0]])
    #                                                for movieJ in all_movies if movieJ[0] != movie[0]],
    #                                               _ratings_by_movie, _global_average))
    #                for movie in movies]
    predictions = [(movie[0], predict_user_rating(user_baseline, movie[0],
                                                  [(movieJ[1], sim_matrix[movieJ[0]][movie[0]])
                                                   for movieJ in all_movies],
                                                  _ratings_by_movie, _global_average))
                   for movie in movies]

    # print predictions
    return sort_desc(predictions)
コード例 #9
0
def get_item_collaborative_predictions_precomputed_similarities(
        movies_to_predict, _all_ratings, _target_user_id,
        _item_item_sim_matrix):
    predictions = []
    _limit_top_neighbours_to = 20
    # target_user_ratings = _all_ratings[_all_ratings['userID'] == _target_user_id]

    for trailer_id, rating in movies_to_predict:

        # print "Trailer id is", trailer_id
        try:
            _all_sim_items = _item_item_sim_matrix[trailer_id]

            # print "All sims are", _all_sim_items
            # break
            # _allowed_sim_items = _all_sim_items[:_limit_top_neighbours_to]
            allowed_sim_items = []

            for item in _all_sim_items:

                rating = _all_ratings[
                    (_all_ratings['userID'] == _target_user_id)
                    & (_all_ratings['id'] == item[0])]['rating']
                try:  # the current user rated this item
                    rating = float(rating)
                    allowed_sim_items.append((item[1], rating))
                except TypeError:
                    continue

                if len(allowed_sim_items) == _limit_top_neighbours_to:
                    break

            # print "Allowed:", allowed_sim_items
            # b_ui = get_item_baseline(user_baseline, trailer_id, _ratings_by_movie, _global_average)

            try:
                p_ui = (
                    sum([sim * rating for sim, rating in allowed_sim_items]) /
                    sum([abs(sim) for sim, rating in allowed_sim_items]))
            except ZeroDivisionError:
                p_ui = 0
        except KeyError:
            p_ui = 0

        predictions.append((trailer_id, p_ui))

    return sort_desc(predictions)
コード例 #10
0
            continue

        intersect = pd.merge(_all_ratings[_all_ratings['id'] == movie],
                             _all_ratings[_all_ratings['id'] == neighbor],
                             on='userID')
        # print intersect
        # exit()

        # if len(intersect) > 4:
        if not intersect.empty:

            try:
                # sim = cosine_similarity(intersect['rating_x'].reshape(1, -1), intersect['rating_y'].reshape(1, -1))
                # sim = cosine_similarity([intersect['rating_x']], [intersect['rating_y']])

                sim = adjusted_cosine(intersect, user_profiles)
                movie_similarity[movie].append((neighbor, sim))

                # sim = cosine_similarity(intersect['rating_x'].reshape(1, -1), intersect['rating_y'].reshape(1, -1))
                # movie_similarity[movie].append((neighbor, sim[0][0]))
            except ValueError:
                continue
        else:
            movie_similarity[movie].append((neighbor, 0))

        movie_similarity[movie] = sort_desc(movie_similarity[movie])
    # print movie_similarity[movie]
    # break

save_obj(movie_similarity, 'item_item_collaborative_similarities')
item_similarities = dict()

for key, target_movie_movielens_id in _all_movies.iterrows():

    other_items = _all_movies.iloc[key + 1:]
    similarities = []

    for sub_key, neighbour_movie_id in other_items.iterrows():

        join_ratings = pd.merge(_all_ratings[_all_ratings['movielensID'] == neighbour_movie_id.iloc[0]],
                                _all_ratings[_all_ratings['movielensID'] == target_movie_movielens_id.iloc[0]], on='userID')
        sim = 0

        if len(join_ratings) > 0:
            ratings_x = np.array(join_ratings['rating_x'])
            ratings_y = np.array(join_ratings['rating_y'])
            sim = cosine_similarity(ratings_x.reshape(1, -1), ratings_y.reshape(1, -1))[0][0]
        similarities.append((neighbour_movie_id, sim))

    ordered = sort_desc(similarities)[:30]
    item_similarities[target_movie_movielens_id.iloc[0]] = ordered

    count += 1
    if count % 100 == 0:
        print count, "movies read"
    break

# print item_similarities
print "finished in", time.time() - start, "seconds"
save_obj(item_similarities, 'item_collaborative_similarity')
コード例 #12
0
from hausdorff import hausdorff
from utils.utils import sort_desc
import numpy as np

_users_bof = load_features('content/3112_users_bof.pkl')

# test_user_1 = np.array(_users_bof[1])
# test_user_3 = np.array(_users_bof[7])
# print test_user_1
# print hausdorff(test_user_1, test_user_3)

users_bof_similarities = {}

for key, user_bof in _users_bof.iteritems():
    users_bof_similarities[key] = []
    print "current user", key

    for neighbor, neighbor_bof in _users_bof.iteritems():
        if neighbor == key:
            continue

        sim = hausdorff(np.array(user_bof), np.array(neighbor_bof))
        users_bof_similarities[key].append((neighbor, sim))

    users_bof_similarities[key] = sort_desc(users_bof_similarities[key],
                                            desc=False)
    # print users_bof_similarities[key]
    # break

save_obj(users_bof_similarities, '3112_user_user_bof_similarities')
            neighbor_average = user_profiles.loc[neighbor]['avg']
        except IndexError as e:
            print e, "neighbor", neighbor, "failed"

        try:
            intersect = pd.merge(
                _all_ratings[_all_ratings['userID'] == neighbor],
                target_user_ratings,
                on='id')

            if len(intersect) < 5:
                sim = 0
            else:
                sim = pearsonr(intersect['rating_x'], intersect['rating_y'])[0]
                # ssim = sum([(item['rating_x'] - neighbor_average) * (item['rating_y'] - target_user_average)
                #             for k, item in intersect.iterrows()]) / (
                #     math.sqrt(sum([(item['rating_x'] - neighbor_average) ** 2 for k, item in intersect.iterrows()])) *
                #     math.sqrt(sum([(item['rating_y'] - target_user_average) ** 2 for k, item in intersect.iterrows()])))
        except ValueError:
            sim = 0

        if not (sim > 0 or sim < 0):
            sim = 0

        user_user_similarities[user].append((neighbor, sim))

    user_user_similarities[user] = sort_desc(user_user_similarities[user])
    # break

save_obj(user_user_similarities, 'user_user_collaborative_similarities')