Ejemplo n.º 1
0
def extract_features(deep_feautures='resnet_152_lstm_128.dct'):

    LOW_LEVEL_FEATURES = load_features('low_level_dict.bin')
    arr = np.array([x[1] for x in LOW_LEVEL_FEATURES.iteritems()])
    scaler = preprocessing.StandardScaler().fit(arr)
    std = scaler.transform(arr)
    LOW_LEVEL_FEATURES = {
        k: v
        for k, v in it.izip(LOW_LEVEL_FEATURES.keys(), std)
    }

    DEEP_FEATURES = load_features(deep_feautures)
    # DEEP_FEATURES = load_features('bof_128.bin')
    arr = np.array([x[1] for x in DEEP_FEATURES.iteritems()])
    scaler = preprocessing.StandardScaler().fit(arr)
    std = scaler.transform(arr)
    DEEP_FEATURES = {k: v for k, v in it.izip(DEEP_FEATURES.keys(), std)}

    HYBRID_FEATURES = {}

    for k in DEEP_FEATURES.iterkeys():
        try:
            HYBRID_FEATURES[k] = np.append(DEEP_FEATURES[k],
                                           LOW_LEVEL_FEATURES[k])
        except KeyError:
            continue

    # arr = np.array([x[1] for x in HYBRID_FEATURES.iteritems()])
    # scaler = preprocessing.StandardScaler().fit(arr)
    # std = scaler.transform(arr)
    # HYBRID_FEATURES = {k: v for k, v in it.izip(HYBRID_FEATURES.keys(), std)}

    return LOW_LEVEL_FEATURES, DEEP_FEATURES, HYBRID_FEATURES
Ejemplo n.º 2
0
def get_similarity_matrices():

    similarities_low_level = load_features(
        'movie_cosine_similarities_low_level.bin')
    similarities_deep = load_features('movie_cosine_similarities_deep.bin')
    similarities_hybrid = load_features('movie_cosine_similarities_hybrid.bin')

    return [similarities_low_level, similarities_deep, similarities_hybrid]
Ejemplo n.º 3
0
def extract_features(deep_feautures='resnet_152_lstm_128.dct'):

    _deep_features = load_features(deep_feautures)
    arr = np.array([x[1] for x in _deep_features.iteritems()])
    scaler = preprocessing.StandardScaler().fit(arr)
    std = scaler.transform(arr)
    _deep_features = {k: v for k, v in it.izip(_deep_features.keys(), std)}

    return _deep_features
Ejemplo n.º 4
0
from scipy.spatial.distance import cosine
from opening_feat import load_features
import sqlite3
import scipy
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from scipy.stats import pearsonr
from scipy.spatial.distance import minkowski
from sklearn import preprocessing
import itertools as it
import opening_feat as of

user_features = of.load_features('users_tfidf_profile.bin')
movie_features = of.load_features('movies_tfidf_profile.bin')

conn = sqlite3.connect('database.db')

# trailerid = 2216 # Jurassic World
movielensid = 4993  # the lord of the rings
# movielensid = 2571 # Matrix
# trailerid = 5632 # ToyStory
# trailerid = 2
# trailerid = 3341
# movie = low_level_features[trailerid]
# movie = features[trailerid]

# movie = features[3412] # Se7en
# movie = features[5612] #Django Unchained
# movie = low_level_features[4484] # Matrix
movie = movie_features[movielensid]
Ejemplo n.º 5
0
    for c in columns:
        columnList.append(c)
    return columnList

conn = sqlite3.connect('database.db')

SEPARATOR = ","

PREDICTION_LIST_SIZE = 20
LIMIT_ITEMS_TO_PREDICT = 10
NUM_USERS = 1

MIN_FEATURE_VALUE = -1
MAX_FEATURE_VALUE = 1

FEATURES = load_features('res_neurons_places_gru_32_feat_1024_scenes_350.bin') #dictionary

COLUMNS = [
           "CAST(imdbrating AS REAL)",
           "CAST(tomatorating AS REAL)",
           #"movielensrating",
        #    "CAST(imdbvotes AS NUMERIC)",
        #    "CAST(year AS NUMERIC)",
           "CAST(metascore AS REAL)",
           "CAST(tomatouserrating AS REAL)"
           ]

COLUMNS_NOMINAL = [
            "genre",
            "actors",
            "director",
Ejemplo n.º 6
0
from opening_feat import load_features
import numpy as np
from sklearn import preprocessing

low_level_features = load_features('low_level_dict.bin')  # normalize
arr = np.array([x[1] for x in low_level_features.iteritems()])
normalized_ll_features = preprocessing.normalize(arr)[0]
Ejemplo n.º 7
0
mid = time()

print mid - start, "seconds"

count = 0

for movie in all_movies:
    subsims = {}
    for movie_j in all_movies:
        count += 1
        try:
            cos = float(recommender.cosine(movie, movie_j,
                                           HYBRID_FEATURES_BOF))
            subsims[movie_j[0]] = cos
        except KeyError:
            print movie, movie_j, "error"
            continue
    similarities[movie[0]] = subsims

print similarities[2]

mid = time()

print mid - start, "seconds"

with open('movie_cosine_similarities_hybrid.bin', 'wb') as fp:
    pickle.dump(similarities, fp, protocol=2)

similarities = of.load_features('movie_cosine_similarities_hybrid.bin')

print similarities[2]
Ejemplo n.º 8
0
def get_similarity_matrices():

    similarities_deep = load_features('movie_cosine_similarities_deep.bin')

    return similarities_deep
Ejemplo n.º 9
0
def extract_tfidf_features():
    user_features = load_features('users_tfidf_profile.bin')
    movie_features = load_features('movies_tfidf_profile.bin')

    return user_features, movie_features
Ejemplo n.º 10
0
# import constants
from utils import isValid
# import scipy
# from scipy.spatial.distance import cosine
from math import sqrt
from sklearn.metrics.pairwise import cosine_similarity
from scipy.stats import pearsonr
import numpy as np
from opening_feat import load_features

DEEP_FEATURES = load_features('resnet_152_lstm_128.dct')  #dictionary


def computePearson(i, j):
    a = np.array(i)
    b = np.array(j)
    if (a.size == 0 and b.size == 0):
        return 0
    else:
        pearson = pearsonr(a, b)
        # print pearson
        return pearson[0]


def computeCosine(i, j):
    a = np.array(i)
    b = np.array(j)
    if (a.size == 0 and b.size == 0):
        return 0
    else:
        a = a.reshape(1, -1)
Ejemplo n.º 11
0
import scipy
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from scipy.stats import pearsonr
from scipy.spatial.distance import minkowski
from sklearn import preprocessing
import itertools as it

conn = sqlite3.connect('database.db')
# features = load_features('res_neurons_32_feat_1024_scenes_350.bin') #dictionary
# features = load_features('res_neurons_places_gru_32_feat_1024_scenes_350.bin')
# features = load_features('resnet_152_lstm_128.dct')
# features = load_features('bof_128.bin')
# low_level_features = load_features('low_level_dict.bin') # normalize

DEEP_FEATURES = load_features('bof_128.bin')
arr = np.array([x[1] for x in DEEP_FEATURES.iteritems()])
scaler = preprocessing.StandardScaler().fit(arr)
std = scaler.transform(arr)
DEEP_FEATURES = {k: v for k, v in it.izip(DEEP_FEATURES.keys(), std)}

LOW_LEVEL_FEATURES = load_features('low_level_dict.bin')
arr = np.array([x[1] for x in LOW_LEVEL_FEATURES.iteritems()])
scaler = preprocessing.StandardScaler().fit(arr)
std = scaler.transform(arr)
LOW_LEVEL_FEATURES = {k: v for k, v in it.izip(LOW_LEVEL_FEATURES.keys(), std)}

HYBRID_FEATURES = {}

for k in DEEP_FEATURES.iterkeys():
    try: