def extract_features(deep_feautures='resnet_152_lstm_128.dct'): LOW_LEVEL_FEATURES = load_features('low_level_dict.bin') arr = np.array([x[1] for x in LOW_LEVEL_FEATURES.iteritems()]) scaler = preprocessing.StandardScaler().fit(arr) std = scaler.transform(arr) LOW_LEVEL_FEATURES = { k: v for k, v in it.izip(LOW_LEVEL_FEATURES.keys(), std) } DEEP_FEATURES = load_features(deep_feautures) # DEEP_FEATURES = load_features('bof_128.bin') arr = np.array([x[1] for x in DEEP_FEATURES.iteritems()]) scaler = preprocessing.StandardScaler().fit(arr) std = scaler.transform(arr) DEEP_FEATURES = {k: v for k, v in it.izip(DEEP_FEATURES.keys(), std)} HYBRID_FEATURES = {} for k in DEEP_FEATURES.iterkeys(): try: HYBRID_FEATURES[k] = np.append(DEEP_FEATURES[k], LOW_LEVEL_FEATURES[k]) except KeyError: continue # arr = np.array([x[1] for x in HYBRID_FEATURES.iteritems()]) # scaler = preprocessing.StandardScaler().fit(arr) # std = scaler.transform(arr) # HYBRID_FEATURES = {k: v for k, v in it.izip(HYBRID_FEATURES.keys(), std)} return LOW_LEVEL_FEATURES, DEEP_FEATURES, HYBRID_FEATURES
def get_similarity_matrices(): similarities_low_level = load_features( 'movie_cosine_similarities_low_level.bin') similarities_deep = load_features('movie_cosine_similarities_deep.bin') similarities_hybrid = load_features('movie_cosine_similarities_hybrid.bin') return [similarities_low_level, similarities_deep, similarities_hybrid]
def extract_features(deep_feautures='resnet_152_lstm_128.dct'): _deep_features = load_features(deep_feautures) arr = np.array([x[1] for x in _deep_features.iteritems()]) scaler = preprocessing.StandardScaler().fit(arr) std = scaler.transform(arr) _deep_features = {k: v for k, v in it.izip(_deep_features.keys(), std)} return _deep_features
from scipy.spatial.distance import cosine from opening_feat import load_features import sqlite3 import scipy import numpy as np from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances from scipy.stats import pearsonr from scipy.spatial.distance import minkowski from sklearn import preprocessing import itertools as it import opening_feat as of user_features = of.load_features('users_tfidf_profile.bin') movie_features = of.load_features('movies_tfidf_profile.bin') conn = sqlite3.connect('database.db') # trailerid = 2216 # Jurassic World movielensid = 4993 # the lord of the rings # movielensid = 2571 # Matrix # trailerid = 5632 # ToyStory # trailerid = 2 # trailerid = 3341 # movie = low_level_features[trailerid] # movie = features[trailerid] # movie = features[3412] # Se7en # movie = features[5612] #Django Unchained # movie = low_level_features[4484] # Matrix movie = movie_features[movielensid]
for c in columns: columnList.append(c) return columnList conn = sqlite3.connect('database.db') SEPARATOR = "," PREDICTION_LIST_SIZE = 20 LIMIT_ITEMS_TO_PREDICT = 10 NUM_USERS = 1 MIN_FEATURE_VALUE = -1 MAX_FEATURE_VALUE = 1 FEATURES = load_features('res_neurons_places_gru_32_feat_1024_scenes_350.bin') #dictionary COLUMNS = [ "CAST(imdbrating AS REAL)", "CAST(tomatorating AS REAL)", #"movielensrating", # "CAST(imdbvotes AS NUMERIC)", # "CAST(year AS NUMERIC)", "CAST(metascore AS REAL)", "CAST(tomatouserrating AS REAL)" ] COLUMNS_NOMINAL = [ "genre", "actors", "director",
from opening_feat import load_features import numpy as np from sklearn import preprocessing low_level_features = load_features('low_level_dict.bin') # normalize arr = np.array([x[1] for x in low_level_features.iteritems()]) normalized_ll_features = preprocessing.normalize(arr)[0]
mid = time() print mid - start, "seconds" count = 0 for movie in all_movies: subsims = {} for movie_j in all_movies: count += 1 try: cos = float(recommender.cosine(movie, movie_j, HYBRID_FEATURES_BOF)) subsims[movie_j[0]] = cos except KeyError: print movie, movie_j, "error" continue similarities[movie[0]] = subsims print similarities[2] mid = time() print mid - start, "seconds" with open('movie_cosine_similarities_hybrid.bin', 'wb') as fp: pickle.dump(similarities, fp, protocol=2) similarities = of.load_features('movie_cosine_similarities_hybrid.bin') print similarities[2]
def get_similarity_matrices(): similarities_deep = load_features('movie_cosine_similarities_deep.bin') return similarities_deep
def extract_tfidf_features(): user_features = load_features('users_tfidf_profile.bin') movie_features = load_features('movies_tfidf_profile.bin') return user_features, movie_features
# import constants from utils import isValid # import scipy # from scipy.spatial.distance import cosine from math import sqrt from sklearn.metrics.pairwise import cosine_similarity from scipy.stats import pearsonr import numpy as np from opening_feat import load_features DEEP_FEATURES = load_features('resnet_152_lstm_128.dct') #dictionary def computePearson(i, j): a = np.array(i) b = np.array(j) if (a.size == 0 and b.size == 0): return 0 else: pearson = pearsonr(a, b) # print pearson return pearson[0] def computeCosine(i, j): a = np.array(i) b = np.array(j) if (a.size == 0 and b.size == 0): return 0 else: a = a.reshape(1, -1)
import scipy import numpy as np from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances from scipy.stats import pearsonr from scipy.spatial.distance import minkowski from sklearn import preprocessing import itertools as it conn = sqlite3.connect('database.db') # features = load_features('res_neurons_32_feat_1024_scenes_350.bin') #dictionary # features = load_features('res_neurons_places_gru_32_feat_1024_scenes_350.bin') # features = load_features('resnet_152_lstm_128.dct') # features = load_features('bof_128.bin') # low_level_features = load_features('low_level_dict.bin') # normalize DEEP_FEATURES = load_features('bof_128.bin') arr = np.array([x[1] for x in DEEP_FEATURES.iteritems()]) scaler = preprocessing.StandardScaler().fit(arr) std = scaler.transform(arr) DEEP_FEATURES = {k: v for k, v in it.izip(DEEP_FEATURES.keys(), std)} LOW_LEVEL_FEATURES = load_features('low_level_dict.bin') arr = np.array([x[1] for x in LOW_LEVEL_FEATURES.iteritems()]) scaler = preprocessing.StandardScaler().fit(arr) std = scaler.transform(arr) LOW_LEVEL_FEATURES = {k: v for k, v in it.izip(LOW_LEVEL_FEATURES.keys(), std)} HYBRID_FEATURES = {} for k in DEEP_FEATURES.iterkeys(): try: