def SVDloadData():
    svd = SVD()
    recsys.algorithm.VERBOSE = True
    dat_file = '/home/commons/RecSys/MOVIEDATA/MOVIEDATA/ml-1m/ratings.dat'
    svd.load_data(filename=dat_file, sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int})
    print svd.get_matrix()
    return svd
Esempio n. 2
0
def quickstart():
    svd = SVD()
    recsys.algorithm.VERBOSE = True

    # load movielens data
    dat_file = DATA_DIR + 'ml-1m-ratings.dat'
    svd.load_data(filename=dat_file,
                  sep='::',
                  format={
                      'col': 0,
                      'row': 1,
                      'value': 2,
                      'ids': int
                  })

    # compute svd
    k = 100
    svd.compute(k=k,
                min_values=10,
                pre_normalize=None,
                mean_center=True,
                post_normalize=True)

    pdb.set_trace()

    # movie id's
    ITEMID1 = 1  # toy story
    ITEMID2 = 1221  # godfather II

    # get movies similar to toy story
    print svd.similar(ITEMID1)

    # get predicted rating for given user & movie
    MIN_RATING = 0.0
    MAX_RATING = 5.0
    USERID = 1
    ITEMID = 1

    # get predicted rating for user1 and item1, mapped onto min max
    pred = svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING)
    actual = svd.get_matrix().value(ITEMID, USERID)
    print 'predicted rating = {0}'.format(pred)
    print 'actual rating = {0}'.format(actual)

    print 'which users should see Toy Story?:'
    print svd.recommend(ITEMID)
Esempio n. 3
0
def loadSVD():        
    
    filename = 'favRate.dat'
    svd = SVD()
    svd.load_data(filename=filename, sep='::', format={'col':0, 'row':1, 'value':2})
    
    svd.save_data("svd.dat", False)
    
    K=20
    svd.compute(k=K, min_values=1, pre_normalize="rows", mean_center=False, post_normalize=True, savefile='.')
    
    
    #svd.recommend(USERID, n=10, only_unknowns=True, is_row=False)
    
    sparse_matrix = svd.get_matrix()
    
    sim_matrix = svd.get_matrix_similarity()
    
    
    
    print sparse_matrix
    
    #print sim_matrix
    
    #1173893,1396943
    sim = svd.similar(897346, 10)
    
    filename = 'swoffering.yaml'
    titleStream = file(filename, 'r')
    titleList = yaml.load(titleStream)
    
    #print sim
    
    for row in sim:
        
        (offid, similar) = row
        
        print offid, titleList[str(offid)], similar        
Esempio n. 4
0
def quickstart():
    svd = SVD()
    recsys.algorithm.VERBOSE = True

    # load movielens data
    dat_file = 'ml-1m/ratings.dat'
    svd.load_data(filename=dat_file, sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int})

    # compute svd
    k = 100
    svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True,
        post_normalize=True)

    pdb.set_trace()

    # movie id's
    ITEMID1 = 1      # toy story
    ITEMID2 = 1221   # godfather II

    # get movies similar to toy story
    svd.similar(ITEMID1)

    # get predicted rating for given user & movie
    MIN_RATING = 0.0
    MAX_RATING = 5.0
    USERID = 1
    ITEMID = 1

    # get predicted rating
    pred = svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING)
    actual = svd.get_matrix().value(ITEMID, USERID)
    print 'predicted rating = {0}'.format(pred)
    print 'actual rating = {0}'.format(actual)

    # which users should see Toy Story?
    svd.recommend(ITEMID)
Esempio n. 5
0
                  'col': 0,
                  'row': 1,
                  'value': 2,
                  'ids': float
              })

k = 30
svd.compute(k=k,
            min_values=10,
            pre_normalize=None,
            mean_center=True,
            post_normalize=True,
            savefile='/tmp/movielens')

# ITEMID1 = 1    # Toy Story (1995)
# ITEMID2 = 2355 # A bug's life (1998)

# print svd.similarity(ITEMID1, ITEMID2)

MIN_RATING = 1.0
MAX_RATING = 5.0

USERID = 1
ITEMID = 1129

print svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING)
print svd.predict(1953, 1, MIN_RATING, MAX_RATING)
# Predicted value 5.0

print svd.get_matrix().value(1953, 1)
# Real value 5.0
Esempio n. 6
0
print "similaridad entre items  sin usar la matrix que ya se genero "
print svd2.similarity(ITEMID1, ITEMID2)

print "similaridad entre items usando la matrix guardada"
print svd.similarity(ITEMID1, ITEMID2)

print "Recomendaciones para el itemid1"
print svd.similar(ITEMID1)

#Haciendo las predicciones
MIN_RATING = 0
MAX_RATING = 5
ITEMID = 1
USERID = 1
print svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING)
print svd.get_matrix().value(ITEMID, USERID)

#HACUIENDO RECOMENDACIONES AL USUARIO Y POR TITEM
print svd.recommend(
    USERID,
    is_row=False)  #cols are users and rows are items, thus we set is_row=False
print svd.recommend(ITEMID)
print "se deben mostrar 5 recomendaciones para el item 1"
print svd.recommend(USERID, n=5, only_unknowns=True, is_row=False)

#usando la matriz que ya esta generada
from recsys.utils.svdlibc import SVDLIBC

svdlibc = SVDLIBC('./data/ratings.dat')
svdlibc.to_sparse_matrix(sep='::',
                         format={
Esempio n. 7
0
            pre_normalize=None,
            mean_center=True,
            post_normalize=True)
# predicted_rating = svd.predict(int(5), 'A1', 1, 10)
# predicted_rating2 = svd.predict(int(1), 'A1', 1, 10)

# print('Predicted rating', predicted_rating)
# print('Predicted rating', predicted_rating2)

records = ETLUtils.load_csv_file(file_name_header, '|')
errors = []

for record in records:
    try:
        # print(record['user'], record['item'], record['rating'])
        user = record['user']
        item = int(record['item'])
        predicted_rating = svd.predict(item, user, 1, 5)
        print(record['user'], record['item'], predicted_rating)
        # predicted_rating = round(predicted_rating)
        actual_rating = svd.get_matrix().value(item, user)
        error = abs(predicted_rating - actual_rating)
        errors.append(error)
    except KeyError:
        continue

mean_absolute_error = MeanAbsoluteError.compute_list(errors)
root_mean_square_error = RootMeanSquareError.compute_list(errors)
print('Mean Absolute error: %f' % mean_absolute_error)
print('Root mean square error: %f' % root_mean_square_error)
    print(json.dumps(similaries, ensure_ascii=False))

# import pdb;pdb.set_trace()
import sys
sys.exit(0)

print(svd.similar(ITEMID1))

# Returns: <ITEMID, Cosine Similarity Value>

MIN_RATING = 0.0
MAX_RATING = 1.0
ITEMID = 109
USERID = 3837663637323963363639393565373833613237396534393132376338386362

print('testing..')
print(svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING))
# Predicted value 5.0

print(svd.get_matrix().value(ITEMID, USERID))

# Real value 5.0

# Recommend (non-rated) movies to a user:
print('recommend to user')
print(svd.recommend(USERID, is_row=False)) #cols are users and rows are items, thus we set is_row=False

print(svd.recommend(ITEMID))

import pdb;pdb.set_trace()
Esempio n. 9
0
 (595,  0.46031829709743477), # Beauty and the Beast
 (1907, 0.44589398718134365), # Mulan
 (364,  0.42908159895574161), # The Lion King
 (2081, 0.42566581277820803), # The Little Mermaid
 (3396, 0.42474056361935913), # The Muppet Movie
 (2761, 0.40439361857585354)] # The Iron Giant

 MIN_RATING = 0.0
MAX_RATING = 5.0
ITEMID = 1
USERID = 1

svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING)
# Predicted value 5.0

svd.get_matrix().value(ITEMID, USERID)
# Real value 5.0

svd.recommend(USERID, is_row=False) #cols are users and rows are items, thus we set is_row=False

# Returns: <ITEMID, Predicted Rating>
[(2905, 5.2133848204673416), # Shaggy D.A., The
 (318,  5.2052108435956033), # Shawshank Redemption, The
 (2019, 5.1037438278755474), # Seven Samurai (The Magnificent Seven)
 (1178, 5.0962756861447023), # Paths of Glory (1957)
 (904,  5.0771405690055724), # Rear Window (1954)
 (1250, 5.0744156653222436), # Bridge on the River Kwai, The
 (858,  5.0650911066862907), # Godfather, The
 (922,  5.0605327279819408), # Sunset Blvd.
 (1198, 5.0554543765500419), # Raiders of the Lost Ark
 (1148, 5.0548789542105332)] # Wrong Trousers, The

k = 100
svd.compute(k=k,
            min_values=10,
            pre_normalize=None,
            mean_center=True,
            post_normalize=True,
            savefile='./data/MERGED6_svd')

# to load a saved svd
# svd = SVD(filename='./data/MERGED_svd') # Loading already computed SVD model


# get the item_id with available results (n<10 rows & columns were cut out)
m = svd.get_matrix()
rowlabl = m._matrix.row_labels
ids = np.array(rowlabl)



# ==== can further reduce the tables using this list of id.

# calculate cosine similarity score between 2 items:
# svd.similarity(ids[0], ids[100])  # cosine similarity

# For each movie:
# 1. get the top 50 books
# 2. eliminating duplicates by comparing titles
# 3. save id and scores for the final 10 books & movies
Esempio n. 11
0
k = 100
svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True)
# predicted_rating = svd.predict(int(5), 'A1', 1, 10)
# predicted_rating2 = svd.predict(int(1), 'A1', 1, 10)

# print('Predicted rating', predicted_rating)
# print('Predicted rating', predicted_rating2)

records = ETLUtils.load_csv_file(file_name_header, '|')
errors = []

for record in records:
    try:
        # print(record['user'], record['item'], record['rating'])
        user = record['user']
        item = int(record['item'])
        predicted_rating = svd.predict(item, user, 1, 5)
        print(record['user'], record['item'], predicted_rating)
        # predicted_rating = round(predicted_rating)
        actual_rating = svd.get_matrix().value(item, user)
        error = abs(predicted_rating - actual_rating)
        errors.append(error)
    except KeyError:
        continue

mean_absolute_error = MeanAbsoluteError.compute_list(errors)
root_mean_square_error = RootMeanSquareError.compute_list(errors)
print('Mean Absolute error: %f' % mean_absolute_error)
print('Root mean square error: %f' % root_mean_square_error)
Esempio n. 12
0
from recsys.algorithm.factorize import SVD
from recsys.datamodel.data import Data

data = [(4.0, 'user1', 'item1'), (2.0, 'user1', 'item3'),
        (1.0, 'user2', 'item1'), (5.0, 'user2', 'item4')]

d = Data()
d.set(data)
svd = SVD()
svd.set_data(d)
m = svd.get_matrix()
svd.compute(k=2)
print svd.similar('user1')
print svd.predict('user1', 'item1')
Esempio n. 13
0
#svd.compute(k=K, pre_normalize=None, mean_center=True, post_normalize=True)

print ''
print 'COMPUTING SIMILARITY'
print svd.similarity(1, 2)  # similarity between items
print svd.similar(1, 5)  # show 5 similar items

print ''
print 'GENERATING PREDICTION'
MIN_RATING = 0.0
MAX_RATING = 5.0
ITEMID = 1
USERID = 1
print svd.predict(ITEMID, USERID, MIN_RATING,
                  MAX_RATING)  # predicted rating value
print svd.get_matrix().value(ITEMID, USERID)  # real rating value

print ''
print 'GENERATING RECOMMENDATION'
print svd.recommend(USERID, n=5, only_unknowns=True, is_row=False)

#Evaluation using prediction-based metrics
rmse = RMSE()
mae = MAE()
spearman = SpearmanRho()
kendall = KendallTau()
#decision = PrecisionRecallF1()
for rating, item_id, user_id in test.get():
    try:
        pred_rating = svd.predict(item_id, user_id)
        rmse.add(rating, pred_rating)