def main(): prefs = r.loadMovieLens( os.getcwd() + "/ml-100k") # Calculates the preferences using recommendations.py movie_recommendations = r.getRecommendations( prefs, str(sub_id)) # Calculates all recommendations for sub user. print("\nThe most recommended movies for " + get_user(sub_id) + " (which is the substitute) are:") for i in range( 5): # This section prints out the recommendations for sub user. u = movie_recommendations[i][1] print(u) print("\nThe least recommended movies for the substitute are: ") for i in range(5): u = movie_recommendations[len(movie_recommendations) - (i + 1)][1] print(u) similiar_item_data = r.calculateSimilarItems( prefs) # Calculate all similar movie data. print("\nFive similar movies to my favorite movie " + favorite_movie + " are:") fav_mov_sim = similiar_item_data[favorite_movie][ 0:5] # For real me, some recommended movies for my preferences. for mov in fav_mov_sim: print(mov[1] + " " + str(mov[0])) print("\nFive similar movies to my least favorite movie " + least_favorite_movie + " are:") lea_fav_mov_sim = similiar_item_data[least_favorite_movie][0:5] for mov in lea_fav_mov_sim: print(mov[1] + " " + str(mov[0]))
def buildSimMatrix(prefsx): sim = recommendations.calculateSimilarItems(prefs, n=10) return sim with open('data/sim.csv', 'wb') as f: c = csv.writer(f) for key, value in sim.items(): c.writerow([key, value])
def pelicula(request): if request.method=='POST': formulario = peliculaForm(request.POST) if formulario.is_valid(): # Cogemos la película de la que queremos recomendaciones pelicula = Pelicula.objects.get(id = formulario.cleaned_data['idpeli']) usuarios = Usuario.objects.all() puntuaciones = {} #En este for metemos en puntuaciones todas las puntuaciones siguiendo el formato de "critics" en recommendations.py for user in usuarios: puntuacions = Puntuacion.objects.filter(usuario = user) pelis = {} for puntuacion in puntuacions: pelis[puntuacion.pelicula.titulo] = puntuacion.puntuacion puntuaciones[user.id] = pelis #Una vez en el formato, simplemente calculamos los 3 más similares similar = recommendations.calculateSimilarItems(puntuaciones, 3) #Y ahora cogemos las recomendaciones de la película que queremos res = similar[pelicula.titulo] #Y las pasamos por parámetro recomen= [res[0][1],res[1][1],res[2][1]] return lista_peliculas(request,pelicula, recomen) else: formulario = peliculaForm() return render_to_response('peliculaForm.html', {'formulario':formulario}, context_instance=RequestContext(request))
def recommendByItems(self, event): if not self.isValid(): # 保证输入框存在有效值 return itemsim = recommendations.calculateSimilarItems(self.prefs, n = 50) self.resultList = recommendations.getRecommendedItems(self.prefs, itemsim, self.userId)[0:self.maxNum] self.showResult()
def recommendByItems(self, event): if not self.isValid(): # 保证输入框存在有效值 return itemsim = recommendations.calculateSimilarItems(self.prefs, n=50) self.resultList = recommendations.getRecommendedItems( self.prefs, itemsim, self.userId)[0:self.maxNum] self.showResult()
def test_getRecommendedItems(self): self.assertEqual( recommendations.getRecommendedItems( recommendations.critics, recommendations.calculateSimilarItems(recommendations.critics), 'Toby'), [(3.182634730538922, 'The Night Listener'), (2.5983318700614575, 'Just My Luck'), (2.4730878186968837, 'Lady in the Water')])
def test_calculateSimilarItems(self): self.assertEqual( recommendations.calculateSimilarItems( recommendations.critics)['Lady in the Water'], [(0.4, 'You, Me and Dupree'), (0.2857142857142857, 'The Night Listener'), (0.2222222222222222, 'Snakes on a Plane'), (0.2222222222222222, 'Just My Luck'), (0.09090909090909091, 'Superman Returns')])
def find_corerelated_films(data): favorite_film = "Schindler's List (1993)" least_film = 'Spice World (1997)' print('here') data_dict = recommendations.calculateSimilarItems(data) second_dict = recommendations.calculateNotSimilarItems(data) print(f'\n Films matching {favorite_film} are: ', data_dict[favorite_film], '\n') print(f'\n Films matching {favorite_film} are: ', second_dict[favorite_film], '\n') print(f'\n Films matching {least_film} are: ', second_dict[least_film], '\n') print(f'\n Films Not matching {least_film} are: ', data_dict[least_film], '\n')
def calculate_similar_items(): from recommendations import calculateSimilarItems ItemSimilarity.objects.all().delete() users_prefs = User.get_users_ratings_dict(lazy_evaluation=False) similar_dict = calculateSimilarItems(users_prefs) for item, similar_items in similar_dict.items(): for score, similar_item in similar_items: product = Product(id=item) similar_product = Product(id=similar_item) itemsimilarity = ItemSimilarity(product=product, similar_product=similar_product, score=score) itemsimilarity.save() del product, similar_product, itemsimilarity
def index(): data = dataCollation().userData() similarItems = recommendations.calculateSimilarItems(data) json_data = json.dumps(data) jsonSimilarItems = json.dumps(similarItems) redis_con = dbConnect.redisCon() if not redis_con.set('recommendUserDate', json_data): return 'error' if not redis_con.set('similarItems', jsonSimilarItems): return 'error' return 'ok'
def main(): count = "Top Gun (1986)" resultsOfloadMovieLens = recommendations.loadMovieLens() resultsOfcalculateSimilarItems = recommendations.calculateSimilarItems(resultsOfloadMovieLens, n=80) print "*" * 60 print "Movies received ratings most like or least like Top Gun" print "*" * 60 print "value\t\t\t\t\t", "Movie Name" print "-" * 60 for key, value in sorted(resultsOfcalculateSimilarItems.items(), key=lambda e: e[1], reverse=True): variable = key if count == variable: for value, movie in value: print value, movie
def main(): count = 'Top Gun (1986)' resultsOfloadMovieLens = recommendations.loadMovieLens() resultsOfcalculateSimilarItems = recommendations.calculateSimilarItems(resultsOfloadMovieLens,n=80) print "*" * 60 print "Movies received ratings most like or least like Top Gun" print "*" * 60 print "value\t\t\t\t\t","Movie Name" print "-" * 60 for key, value in sorted(resultsOfcalculateSimilarItems.items(), key=lambda e: e[1], reverse=True): variable = key if (count == variable): for value , movie in value: print value , movie
def testMovieLens(): import recommendations prefs = loadMovieLens() userId = '87' print prefs[userId] start = time.clock() print recommendations.getRecommendations(prefs, userId)[0:30] print "Total Time:" + str(time.clock() - start) print print "Preprocessing..." itemMatches = recommendations.calculateSimilarItems(prefs, n=50) print start = time.clock() print recommendations.getRecommendedItems(prefs, itemMatches, userId)[0:30] print "Total Time:" + str(time.clock() - start)
def buildSimMatrix(prefs): sim = recommendations.calculateSimilarItems(prefs, n=10) return sim # with open('data/sim.csv', 'wb') as f: # c = csv.writer(f) # for key, value in sim.items(): # c.writerow([key, value]) # def loadSimMatrix(): # r = csv.reader(open('data/sim.csv', 'rb')) # mydict = dict(x for x in r) # return mydict # Build movie data # processMovie() # Build similarity matrix # prefs = recommendations.loadMovieLens() # sim = buildSimMatrix(prefs) # print sim # Load # loadSimMatrix()
#!/usr/local/bin/python import sys import pprint sys.path.insert(0, '../starter-code') import recommendations if __name__ == '__main__': film = sys.argv[1] threshold = int(sys.argv[2]) direction = sys.argv[3] prefs = recommendations.loadMovieLens('../data') result = recommendations.calculateSimilarItems(prefs, n=1682) if direction == 'most': print "Movies most like '" + film + "': '" for i in range(0, threshold): print result[film][i][1] + ' (' + str(result[film][i][0]) + ')' else: print "Movies least like '" + film + "': '" for i in range(1, threshold): print result[film][-i][1] + ' (' + str(result[film][-i][0]) + ')'
# Valentina Neblitt-Jones # CS 595 Introduction to Web Science # Fall 2013 # Assignment #8 Question #5 # What movie received ratings most like Top Gun? Which movie received ratings that were least like Top Gun (negative correlation)? import sys import pprint sys.path.insert(0, '/Users/vneblitt/Documents/cs595-f13/assignment08/library') import recommendations g = open('topgun.txt', 'w') prefs = recommendations.loadMovieLens(path='/Users/vneblitt/Documents/cs595-f13/assignment08/dataset') answer = recommendations.calculateSimilarItems(prefs,n=1664) pp = pprint.PrettyPrinter(indent=4) pp.pprint(answer['Top Gun (1986)']) g.write(str(answer)) g.close()
import recommendations as rec prefs = rec.loadMovieLens("../data/ml-100k") def getTopAndBottomCorrelated(movie_info, movie): m = movie_info[movie] top = m[:5] bottom = m[len(m) - 5 :] bottom.reverse() return top, bottom movies_info = rec.calculateSimilarItems(prefs, n=2000, best=True) best_movie = "Stargate (1994)" worst_movie = "Batman Forever (1995)" table = open("../docs/q4_table.tex", "w") table.write("My favorite film from the data is: " + best_movie + "\n\n") top, bottom = getTopAndBottomCorrelated(movies_info, best_movie) table.write("The top 5 most correlated are:\n\n") table.write("\\begin{longtable}{| c | c | c |}\n") table.write("\\hline\n") table.write("Position & Movie & Similarity\\\\\n") count = 1 for l in top: table.write("\\hline\n {} & {} & {:.3f}\\\\\n".format(count, l[1], l[0])) count += 1 table.write("\\hline\n") table.write("\\caption{Top 5 correlated to favorite movie}\n")
import recommendations import csv prefs = recommendations.loadMovieLens() similarities = recommendations.calculateSimilarItems(prefs) good_movie = similarities['Reservoir Dogs (1992)'] not_for_me = similarities['Winnie the Pooh and the Blustery Day (1968)'] with open('csv_files/q4_top_rec.csv', 'w') as output: csv_out = csv.writer(output) csv_out.writerow(['correlation', 'movie title']) csv_out.writerows(good_movie[0:5]) with open('csv_files/q4_bad_rec.csv', 'w') as output: csv_out = csv.writer(output) csv_out.writerow(['correlation', 'movie title']) csv_out.writerows(not_for_me[0:5])
from recommendations import critics import recommendations print critics['Lisa Rose']['Lady in the Water'] critics['Toby']['Snakes on a Plane'] = 4.5 print critics['Toby'] print critics print "-----------" print recommendations.getRecommendations(recommendations.critics, 'Toby') print "-----------" movies = recommendations.transformPrefs(recommendations.critics) print recommendations.topMatches(movies, 'Superman Returns') print "-----------" itemsim = recommendations.calculateSimilarItems(recommendations.critics) print itemsim print "-----------" print recommendations.getRecommendedItems(recommendations.critics, itemsim, 'Toby') print "-----------" prefs = recommendations.loadMovieLens() print prefs['1'] print "-----------" print recommendations.getRecommendations(prefs,'42')[31:60] print "-----------" ##itemsim = recommendations.calculateSimilarItems(prefs, n =50) ##print itemsim print "-----------" ##print recommendations.getRecommendedItems(prefs,itemsim, '87')[0:30] print "-----------" print recommendations.getKNNRecommendations(prefs,'42',30)[31:60]
import recommendations import time; def loadMovieLens(path='/Users/yupeng/Downloads/ml-100K'): movies = {} for line in open(path + '/u.item'): (id, title) = line.split('|')[0:2] movies[id] = title prefs = {} for line in open(path + '/u.data'): (user, movieid, rating, ts) = line.split("\t") prefs.setdefault(user, {}) prefs[user][movies[movieid]] = float(rating) return prefs prefs = loadMovieLens() start = time.time() result = recommendations.getRecommendations(prefs, '87')[0:30] print result print time.time() - start start = time.time() itemsim = recommendations.calculateSimilarItems(prefs, n=50) print itemsim print recommendations.getRecommendedItems(prefs, itemsim, '87')[0:30] print time.time() - start
import recommendations as rec # 2.4.1.1 # transformation of the critics matrix to {"movie":{"Person1":"similarity", ...}} transCritics = rec.transposeMatrix(rec.critics) # 2.4.1.2 # similar_items_euclidean = rec.calculateSimilarItems(transCritics, rec.sim_euclid_normed) similar_items_pearson = rec.calculateSimilarItems(transCritics, rec.sim_pearson) print rec.topMatches(transCritics, 'Lady in the Water', rec.sim_euclid) # 2.4.1.3 print "Recommended Movies (euclidean): " + str(rec.getRecommendedItems(rec.critics, 'Toby Segaran', similar_items_euclidean)) print "Recommended Movies (pearson): " + str(rec.getRecommendedItems(rec.critics, 'Toby Segaran', similar_items_pearson))
for line in rating_list: userid = int(line[0]) jokeid = int(line[1]) rating = round(float(line[2]))/5+3 if rating_critics.has_key(userid): rating_critics[userid][jokeid] = rating else: rating_critics[userid] = {} rating_critics[userid][jokeid] = rating print rating_critics # Build the Item Comparison Dataset for item-based recommendation similarjokes = recommendations.calculateSimilarItems(rating_critics, n=10) print similarjokes # Read joke data from jester_ratings.dat # with open('jester_items.dat','rU') as joke_data: # joke_set = dict() # joke_list = joke_data.read().split('\n\n') # for joke in joke_list[0:150]: # joke = joke.replace('\n','') # result = re.search(r'(\d*?):(.*)', joke) # joke_set[int(result.group(1))] = result.group(2) # # print joke_set
print "distance" print recommendations.sim_distance(recommendations.critics, 'ming', 'lin') print recommendations.sim_distance(recommendations.critics, 'ming', 'michael') print recommendations.sim_distance(recommendations.critics, 'ming', 'mick') print "pearson" print recommendations.sim_pearson(recommendations.critics, 'ming', 'lin') print recommendations.sim_pearson(recommendations.critics, 'ming', 'michael') print recommendations.sim_pearson(recommendations.critics, 'ming', 'mick') print "top match" print recommendations.topMatches(recommendations.critics, 'ming', n = 3) print "recommendation" print recommendations.getRecommendations(recommendations.critics, 'ming') print "recommend items" items = recommendations.transformPrefs(recommendations.critics) print recommendations.topMatches(items, 'lady in the water', n =3) print recommendations.sim_pearson(items, 'lady in the water', 'snake on a plane') print "item similarity use distance" print recommendations.calculateSimilarItems(recommendations.critics) print 'item similarity use pearson' print recommendations.calculateSimilarItems(recommendations.critics, similarity = recommendations.sim_pearson) print 'recommendation base on items' itemsim = recommendations.calculateSimilarItems(recommendations.critics) print recommendations.getRecommendedItems(recommendations.critics, itemsim, 'ming')
def setUp(self): self.item_sim = recommendations.calculateSimilarItems(critics)
import recommendations as rec dict = rec.topMatches(rec.critics, person='Toby', similarity='sim_euclid') print "euklid" print dict print "_"*80 dict = rec.topMatches(rec.critics, person='Toby', similarity='sim_pearson') print "pearson" print dict recommendations = rec.getRecommendations(rec.critics, 'Toby', 'sim_pearson') print "Rec" print recommendations print "UCF" print rec.critics transCritics = rec.transformCritics(rec.critics, 'sim_euclid') print "ICF" print transCritics #print rec.topMatches(transCritics, 'Lady in the Water', 'sim_pearson') print rec.calculateSimilarItems(transCritics, 'sim_pearson') #print rec.topMatches(transCritics, 'Snakes on a Plane', 'sim_pearson')
movies = recommendations.transformPrefs(recommendations.critics) print '调换人与物品:' print movies print '打印相似类型的电影:' print recommendations.topMatches(movies, 'Superman Returns') print recommendations.getRecommendations(movies, 'Just My Luck') # import pydelicious # print pydelicious.get_popular(tag='programming') # print '开始填充数据:' # from deliciousrec import * # delusers = initializeUserDict('programming') # delusers['tsegaran'] ={} # fillItems(delusers) # print delusers itemSim = recommendations.calculateSimilarItems(recommendations.critics) print itemSim print '基于物品推荐:' print recommendations.getRecommendedItems(recommendations.critics, itemSim, 'Toby') print '电影数据:' prefs = recommendations.loadMovieLens() print prefs['87'] print recommendations.getRecommendations(prefs, '87')[0:30] itemSim2 = recommendations.calculateSimilarItems(prefs, n=50) print recommendations.getRecommendedItems(prefs, itemSim2, '87')[0:30]
import recommendations as rec pref = rec.loadMovieLens() top = 'Toy Story (1995)' bot = 'Braveheart (1995)' topmov = rec.calculateSimilarItems(1, pref, 5) botmov = rec.calculateSimilarItems(0, pref, 5) print 'Best Recommended movies for ' + top + ':' print '---------------------------------------------------------------' for movie in topmov[top]: print movie[1] print "--------------------------------------------------------" print 'Least Recommended movies for ' + top + ':' print '---------------------------------------------------------------' for movie in botmov[top]: print movie[1] print "--------------------------------------------------------" print 'Best Recommended movies for ' + bot + ':' print '---------------------------------------------------------------' for movie in topmov[bot]: print movie[1] print "--------------------------------------------------------" print 'Least Recommended movies for' + bot + ':' print '---------------------------------------------------------------' for movie in botmov[bot]: print movie[1] print "--------------------------------------------------------"
def test_getRecommendedItems(self): self.assertEqual(recommendations.getRecommendedItems(recommendations.critics, recommendations.calculateSimilarItems(recommendations.critics),'Toby'), [(3.182634730538922, 'The Night Listener'),(2.5983318700614575, 'Just My Luck'),(2.4730878186968837, 'Lady in the Water')])
def test_calculateSimilarItems(self): self.assertEqual(recommendations.calculateSimilarItems(recommendations.critics)['Lady in the Water'], [(0.4, 'You, Me and Dupree'), (0.2857142857142857, 'The Night Listener'), (0.2222222222222222, 'Snakes on a Plane'), (0.2222222222222222, 'Just My Luck'), (0.09090909090909091, 'Superman Returns')])
print sim_distance(critics, 'Lisa Rose', 'Gene Seymour') print 'Pearson Correlation Score of Lisa Rose and Gene Seymour is ' print sim_pearson(critics, 'Lisa Rose', 'Gene Seymour') print 'TopMatches 3 for Toby is ' print topMatches(critics, 'Toby', n=3) # User-Based CF print '通过按人群与 Toby 相似度,加权重新评分,为影片排名获得推荐: ' print getRecommendations(critics, 'Toby') print '通过查看哪些人喜欢 Superman Returns,以及这些人喜欢哪些其他物品来确定相似度:' movies = transformPrefs(critics) print topMatches(movies, 'Superman Returns') print '可能最喜欢 Just My Luck 的人群列表(对调人和物不一定能获得有用信息):' print getRecommendations(movies, 'Just My Luck') # Item-Based CF print '构造物品比较数据集:' itemsim = calculateSimilarItems(critics) print itemsim print '基于物品的推荐为 Toby 提供推荐列表:' print getRecommendedItems(critics, itemsim, 'Toby')
def testBasics(self): d = {'N': {'p': 1.0, 'j': 0.3}, 'Y':{'p': 0.8, 'j': 0.2, 'r':1.0} } itemsim = recommendations.calculateSimilarItems(d) r = recommendations.getRecommendedItems(d, itemsim, 'N') self.assertEquals(1, len(r)) self.assertEquals('r', r[0][1])
#Get recommendations of movies using Euclidian print( recommendations.getRecommendations( recommendations.critics, 'Toby', similarity=recommendations.sim_distance)) # Transpose the matrix to movies vs user movies = recommendations.transformPrefs(recommendations.critics) #Print similar movies print(recommendations.topMatches(movies, 'Superman Returns')) #Similarity matirx of items itemsim = recommendations.calculateSimilarItems(recommendations.critics) print(itemsim) #Get recommendations based on similarity matrix print( recommendations.getRecommendedItems(recommendations.critics, itemsim, 'Toby')) # print(recommendations.sim_pearson(critics,'Lisa Rose','Gene Seymour')) # print(recommendations.getRecommendations(recommendations.critics,'Toby')) # delusers=deliciousrec.initializeUserDict('programming') # itemsim=recommendations.calculateSimilarItems(recommendations.critics) #
import recommendations import datetime prefers = recommendations.loadMovieLens() startTime = datetime.datetime.now() itemMatch = recommendations.calculateSimilarItems(prefers, n = 50) print(recommendations.getRecommendedItems(prefers, itemMatch, "87")[0:30]) print((datetime.datetime.now() - startTime).total_seconds()) startTime = datetime.datetime.now() print(recommendations.getRecommendedItems(prefers, itemMatch, "88")[0:30]) print((datetime.datetime.now() - startTime).total_seconds())
print recommendations.topMatches(recommendations.critics,'Toby',n=3,similarity=recommendations.sim_distance) recommendations.getRecommendations(recommendations.critics,'Toby') recommendations.getRecommendations(recommendations.critics,'Toby',similarity=recommendations.sim_distance) movies=recommendations.transformPrefs(recommendations.critics) #print movies recommendations.topMatches(movies,'Superman Returns') recommendations.getRecommendations(movies,'Just My Luck') recommendations.getRecommendations(movies,'Lady in the Water') itemsim=recommendations.calculateSimilarItems(recommendations.critics,n=8) itemsim recommendations.getRecommendedItems(recommendations.critics,itemsim,'Toby') prefs=recommendations.loadMovieLens() prefs['87'] recommendations.getRecommendations(prefs,'87')[0:30] itemsim1=recommendations.calculateSimilarItems(prefs,n=50) recommendations.getRecommendedItems(prefs,itemsim1,'87')[0:30]