Ejemplo n.º 1
0
def load_svd():
    _k = 100
    # matrix = np.loadtxt('content/full_matrix_for_svd.pkl')
    matrix = load_features('content/full_matrix_for_svd.pkl')
    np_matrix = matrix.as_matrix()

    u, s, v = np.linalg.svd(np_matrix)

    reduced_u = u[:, :_k]  # 3112 x _k
    reduced_s = s[:_k]  # _k x 1
    reduced_v = v[:_k, :]  # _k x 3473

    return reduced_u, reduced_s, reduced_v
Ejemplo n.º 2
0
# exit()

conn = sqlite3.connect(
    '/home/ralph/Dev/content-based-recsys/content/database.db')
_all_ratings = pd.read_sql(
    'select distinct t.id '
    'from movielens_rating r '
    'join movielens_movie m on m.movielensid = r.movielensid '
    'join trailers t on t.imdbid = m.imdbidtt '
    'where t.best_file = 1 '
    # 'and userid < 5000 '
    'order by t.id',
    conn)
# index_to_trailer_id = {}

tfidf_array = load_features('movies_tfidf_synopsis_array.pkl')

# print _all_ratings.iloc[1]
# exit()

count = 0
_safe_exit = 2

trailer_tfidf_similarities = dict()

for i in range(0, len(tfidf_array)):
    # print sum(tfidf_array[i])
    trailer_id = _all_ratings.iloc[i]
    print trailer_id
    trailer_tfidf_similarities[trailer_id[0]] = {}
    # trailer_tfidf_similarities[trailer_id[0]] = []
Ejemplo n.º 3
0
# coding=utf-8
from utils.opening_feat import load_features, save_obj
import numpy as np
from matplotlib import pyplot as plt

# results = load_features('results_3112_users.pkl')
# results_low_level = load_features('results_3112_users_low_level_features.pkl')

# for i in range(2, 16):
#     results[i]['low-level'] = results_low_level[i]['low-level']

# print results
# save_obj(results, 'full_results_3112_users')
# exit()

results = load_features('../results_3112_users.pkl')

# collaborative, DeepRecVis (deep), user-centroid, user-centroid-relevant-movies, mixing-weighted-hybrid, weighted-weighted-hybrid
listing = []
# user_collaborative, item_collaborative, deep, weighted-hybrid, low_level = \
#     {'precision': [], 'recall': [], 'diversity': []}, {'precision': [], 'recall': [], 'diversity': []}, \
#     {'precision': [], 'recall': [], 'diversity': []}, {'precision': [], 'recall': [], 'diversity': []},\
#     {'precision': [], 'recall': [], 'diversity': []}
# user_collaborative, item_collaborative, deep, weighted_hybrid, low_level, weighted_hybrid_collaborative, \
# weighted_hybrid_item_content, switching_hybrid, tfidf, synopsis = \
deep, low_level, tfidf, synopsis = \
    {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \
    {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \
    {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \
    {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}
from utils.opening_feat import load_features
import operator

_trailers_tfidf_sims_matrix = load_features(
    '/home/ralph/Dev/content-based-recsys/content/trailer_tfidf_similarities.pkl'
)
print _trailers_tfidf_sims_matrix[4484]
print type(_trailers_tfidf_sims_matrix[4484])

# print _trailers_tfidf_sims_matrix[4484]
sorted_x = sorted(_trailers_tfidf_sims_matrix[4484].items(),
                  key=operator.itemgetter(1),
                  reverse=True)
print sorted_x
Ejemplo n.º 5
0
import time
import recommender
import evaluation
from utils.opening_feat import load_features, save_obj

start = time.time()

# 85040 is the full set size (4252 is 20 iterations)
# users = select_random_users(conn, 100 * batch, 100)

_item_item_collaborative_matrix = load_features(
    'content/item_item_collaborative_similarities.pkl')

# print _item_item_collaborative_matrix[4484]
# x = [k for k, v in _item_item_collaborative_matrix[4484] if v == (4485, 23.988368963108908)]
# print x
# print _item_item_collaborative_matrix[4484].index((4485, 23.988368963108908))
# print _item_item_collaborative_matrix[4484].index((4486, -40.004855289600997))
# exit()

print "loading user profiles..."
user_profiles = load_features('content/user_profiles_dataframe_3112_users.pkl')
# user_profiles = load_features('content/user_profiles_dataframe_all_users.pkl')
print "user profiles loaded in", time.time() - start, "seconds."

# user_profiles = load_features('content/user_profiles_dataframe_with_user_centroid.pkl')
# user_profiles = user_profiles[:20]
# print "AVG", user_profiles.iloc[7]['avg'], "."
# DEEP_FEATURES_BOF = extract_features('content/bof_128.bin')

# Map every similarity between each movie
Ejemplo n.º 6
0
import sqlite3
import pandas as pd
import math
from sklearn.metrics.pairwise import cosine_similarity
from utils.utils import sort_desc
from utils.opening_feat import save_obj, load_features

# df = load_features('/home/ralph/Dev/content-based-recsys/item_item_collaborative_similarities.pkl')
# print df
# exit()

user_profiles = load_features(
    '../content/user_profiles_dataframe_all_users.pkl')
# print user_profiles.index.values
# print user_profiles.loc[3858]['avg']
# exit()

conn = sqlite3.connect(
    '/home/ralph/Dev/content-based-recsys/content/database.db')
_all_ratings = pd.read_sql(
    'select userID, t.id, rating from movielens_rating r '
    'join movielens_movie m on m.movielensid = r.movielensid '
    'join trailers t on t.imdbid = m.imdbidtt '
    'where t.best_file = 1 '
    'and userid < 5000 '
    'order by t.id', conn)

movies = _all_ratings['id'].unique()

movie_similarity = {}
Ejemplo n.º 7
0
# import pandas as pd
from utils.opening_feat import load_features

user_profiles_with_predictions = load_features(
    'content/profiles_with_predictions.pkl')
# df = pd.DataFrame.from_dict(user_profiles_with_predictions)

for index, profile in user_profiles_with_predictions.iteritems():
    print "index", index
    print profile
Ejemplo n.º 8
0
from utils.opening_feat import load_features, save_obj
from hausdorff import hausdorff
from utils.utils import sort_desc
import numpy as np

_users_bof = load_features('content/3112_users_bof.pkl')

# test_user_1 = np.array(_users_bof[1])
# test_user_3 = np.array(_users_bof[7])
# print test_user_1
# print hausdorff(test_user_1, test_user_3)

users_bof_similarities = {}

for key, user_bof in _users_bof.iteritems():
    users_bof_similarities[key] = []
    print "current user", key

    for neighbor, neighbor_bof in _users_bof.iteritems():
        if neighbor == key:
            continue

        sim = hausdorff(np.array(user_bof), np.array(neighbor_bof))
        users_bof_similarities[key].append((neighbor, sim))

    users_bof_similarities[key] = sort_desc(users_bof_similarities[key],
                                            desc=False)
    # print users_bof_similarities[key]
    # break

save_obj(users_bof_similarities, '3112_user_user_bof_similarities')
import sqlite3
import time
import pandas as pd
import numpy as np
from utils.opening_feat import load_features, save_obj

matrix = load_features(
    '/home/ralph/Dev/content-based-recsys/content/full_matrix_for_svd.pkl')
print type(matrix.as_matrix())
exit()

_movies_sql = 'select DISTINCT t.id from trailers t ' \
              'join movielens_movie m on t.imdbid = m.imdbidtt ' \
              'join movielens_rating r on m.movielensid = r.movielensid ' \
              'where userid < 5000 ' \
              'order by t.id'

start = time.time()
conn = sqlite3.connect(
    '/home/ralph/Dev/content-based-recsys/content/database.db')
_3112_user_ratings = pd.read_sql(
    'select userID, t.id, rating from movielens_rating r '
    'join movielens_movie m on m.movielensid = r.movielensid '
    'join trailers t on t.imdbid = m.imdbidtt '
    'where userid < 5000 '
    'order by userid, t.id', conn)

c = conn.cursor()
_movies = c.execute(_movies_sql)

import sqlite3
import pandas as pd
import numpy as np
import sys
import math
from sklearn.metrics.pairwise import cosine_similarity
from utils.utils import sort_desc
from scipy.stats import pearsonr
from utils.opening_feat import load_features, save_obj

user_profiles = load_features(
    '/home/ralph/Dev/content-based-recsys/content/user_profiles_dataframe_all_users.pkl'
)
# print user_profiles.loc[3113]
# exit()
# print user_profiles.columns
# exit()

conn = sqlite3.connect('content/database.db')
_all_ratings = pd.read_sql(
    'select userID, t.id, rating from movielens_rating r '
    'join movielens_movie m on m.movielensid = r.movielensid '
    'join trailers t on t.imdbid = m.imdbidtt '
    # 'where userid < 5000 '
    'order by userid',
    conn)
conn.close()

users = _all_ratings['userID'].unique()
movies = _all_ratings['id'].unique()
Ejemplo n.º 11
0
from utils.opening_feat import load_features, save_obj
import numpy as np
from matplotlib import pyplot as plt

# results = load_features('results_3112_users.pkl')
# results_low_level = load_features('results_3112_users_low_level_features.pkl')

# for i in range(2, 16):
#     results[i]['low-level'] = results_low_level[i]['low-level']

# print results
# save_obj(results, 'full_results_3112_users')
# exit()

results = load_features('results_500_users.pkl')

# collaborative, DeepRecVis (deep), user-centroid, user-centroid-relevant-movies, mixing-weighted-hybrid, weighted-weighted-hybrid
listing = []
# user_collaborative, item_collaborative, deep, weighted-hybrid, low_level = \
#     {'precision': [], 'recall': [], 'diversity': []}, {'precision': [], 'recall': [], 'diversity': []}, \
#     {'precision': [], 'recall': [], 'diversity': []}, {'precision': [], 'recall': [], 'diversity': []},\
#     {'precision': [], 'recall': [], 'diversity': []}
# user_collaborative, item_collaborative, deep, weighted_hybrid, low_level, weighted_hybrid_collaborative, \
# weighted_hybrid_item_content, switching_hybrid, tfidf, synopsis = \
user_collaborative, item_collaborative, deep, weighted_hybrid, low_level, switching_hybrid, tfidf, synopsis = \
    {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \
    {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \
    {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \
    {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \
    {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \
    {'precision': [], 'recall': [], 'diversity': [], 'rankscore': [], 'f1': []}, \