def testDelicious(): import recommendations delusers = initializeUserDict('programming') fillItems(delusers) user = delusers.keys()[random.randint(0, len(delusers) - 1)] print user print recommendations.topMatches(delusers, user, similarity=recommendations.simEuclidean) recUrls = recommendations.getRecommendations(delusers, user, similarity=recommendations.simEuclidean)[0:10] print recUrls url = recUrls[0][1] print url print recommendations.topMatches(recommendations.transformPrefs(delusers), url)
def run_assignment_2_2(): print "-"*50, "\n", "assignment 2.2: Calculate similarites\n", "-"*50 for testperson in critics.iterkeys(): print "Euclidean Distances for", testperson tMatch = reco.topMatches(critics, testperson, reco.sim_euclid) for m in tMatch : print " ", m[0], m[1] print "-" *20 print "Pearson Distances for", testperson tMatch = reco.topMatches(critics, testperson, reco.sim_pearson) for m in tMatch : print " ", m[0], m[1] print "-"*50
def buildSimilarityDict(pref, similarity=sim_distance): result = {} prefs = transform(pref) for item in prefs: scores = topMatches(prefs, item, 5, similarity) result[item] = scores return result
def testBasics(self): scores = { 'Yann': 3, 'Kerstin': 2, 'Josh': 1 } def stubDistance(prefs, p1, p2): self.assertEquals(self.data, prefs) if p1 == 'Nico': return scores[p2] else: return scores[p1] m = recommendations.topMatches(self.data, 'Nico', similarity=stubDistance) self.assertEquals([(3, 'Yann'), (2, 'Kerstin'), (1, 'Josh')], m)
def testSanity(self): expectList = recommendations.topMatches(recommendations.critics, "Toby", n=3) actualList = myRec.topMatches(myRec.critics, "Toby", n=3) self.assertEqual(len(actualList), 3) self.assertEqual(sum( [ x for x,v in expectList ]), sum( [ x for x,v in actualList]))
def get_similar_users(self, n=-1, similarity='pearson'): from recommendations import topMatches, get_similarity users_prefs = User.get_users_ratings_dict(lazy_evaluation=False) sim = get_similarity(similarity) return [ (correlation, User.objects.get(id=userid)) for correlation, userid in topMatches(users_prefs, self.id, n=n, similarity=sim) ]
def test_topMatches(self): actual = rec.topMatches(rec.critics,'Toby',n=3) expected = [(0.99124070716192991, 'Lisa Rose'), (0.92447345164190486, 'Mick LaSalle'), (0.89340514744156474, 'Claudia Puig')] self.assertIsInstance(actual, list) self.assertEqual( round_sequence(actual), round_sequence(expected))
def calculateSimilarItems(prefs, n): simDict = {} # Transform to item base itemSim = transform(prefs) for item in itemSim: simDict[item] = topMatches(itemSim, item, n=n, similarity=sim_distance) return simDict
def test_topMatches_movies(self): movies = rec.transformPrefs(rec.critics) matches_long = rec.topMatches(movies, 'Superman Returns') matches = round_sequence(matches_long, 3, trunc_n) self.assertEqual( matches, [(0.657, 'You, Me and Dupree'), (0.487, 'Lady in the Water'), (0.111, 'Snakes on a Plane'), (-0.179, 'The Night Listener'), (-0.422, 'Just My Luck')])
def calculateSimilarItems(perfs, n=10): result = {} itemPerfs = transformPerfs(perfs) c = 0 for item in itemPerfs: c += 1 if c % 100 == 0: print "%d / %d" % (c, len(itemPerfs)) scores = topMatches(itemPerfs, item, resultSize=n, recoFunc=euclideanDist) result[item] = scores return result
def fillsimilaritymatrix(self): dictObj = self.geturlwordcount() #dumpfile = open("urlsim.dump","w") for url in self.con.execute('select distinct urlid from wordlocation order by urlid'): urllist=topMatches(dictObj,url[0],n=50) print(repr(url[0]) + ":\n" + repr(urllist)) self.log.debug(repr(url[0]) + ":\n" + repr(urllist)) for u in urllist: self.con.execute('insert into urlsimilarity(urlid1,urlid2,similarity) values(%s,%s,%f)'%(url[0],u[1],u[0])) #dumpfile.write(repr(url[0]) + "," + repr(u[1]) + ',' + repr(u[0]) + '\n') self.log.debug(repr(url[0]) + "," + repr(u[1]) + ',' + repr(u[0]) + '\n') self.dbcommit()
def main(): results = {} resultsOfloadMovieLens = recommendations.loadMovieLens() uniqueValues = resultsOfloadMovieLens.keys() length = len(resultsOfloadMovieLens) for key in range(0,length): resultsOftopMatches = recommendations.topMatches(resultsOfloadMovieLens,uniqueValues[key],n=length) for value in resultsOftopMatches: if value[1] == uniqueValues[key]: pass for key, value in sorted(results.items(), key=lambda e: e[1], reverse=True): print key, results[key]
def main(): results = {} resultsOfloadMovieLens = recommendations.loadMovieLens() uniqueValues = resultsOfloadMovieLens.keys() length = len(resultsOfloadMovieLens) for key in range(0, length): resultsOftopMatches = recommendations.topMatches( resultsOfloadMovieLens, uniqueValues[key], n=length) for value in resultsOftopMatches: if value[1] == uniqueValues[key]: pass for key, value in sorted(results.items(), key=lambda e: e[1], reverse=True): print key, results[key]
def main(): print 'Audioscrobbler/Last.fm Recommendations by Chris Coykendall ' print '------------------------------------------------------------' if network: # Get user to recommend to user=raw_input('Enter user to recommend to: ') # DEBUG user='******' numFriends=raw_input('Enter # of friends to collect listen data from: ') print 'Fetching Last.fm API data for ' + user + '...' # Get the fan's friends friends=getFanFriends(user,int(numFriends)) if friends==None: return print 'Retrieved ' + str(len(friends)-1) + ' friends from ' + user + '...' # Get the fan and friends top artists based on relative number of artist plays print 'Fetching most listened artists from user and their friends...' fanArtistData=getArtistPlaysDataSet(friends) # Use similarity to determine top 5 similar users based on artists played closestMatches= recommendations.topMatches(fanArtistData,user,5) print '>>>>> Top 5 Similar Users out of ' + str(len(closestMatches)) + ' friends:' for match in closestMatches[:5]: print match[1] + ' (' + str(round(((match[0]+1)/2)*100,1)) + '%)' print '<<<<<' # Use Pearson score to determine top 20 recommended artists for user from friends listens recArtists= recommendations.getRecommendations(fanArtistData,user) print '>>>>> Top 20 Recommended Artists out of ' + str(len(recArtists)) sumIt=0 for match in recArtists[:20]: print match[1] + ' (' + str(round(match[0]*20,1)) + '%)' print '<<<<<' # Ask whether or not to display the data set if raw_input('Would you like to display the data dictionary (Y/N)?: ').upper()=='Y': for user in fanArtistData: print user for artist in fanArtistData[user]: print '\t' + artist + ': ' + str(round(fanArtistData[user][artist],1))
def getRatings(ratingsfile): itemsdict = {} count=0 for line in ratingsfile: (user_id, item_id, rating, timestamp) = line.split('\t') if user_id in itemsdict: itemsdict[user_id][item_id] = float(rating) else: count=count+1 itemsdict[user_id]={} output= recommendations.topMatches(itemsdict,'33',n=count) top = output[:5] print 'top 5 correlated users to user 33 are' print top bot = output[-5:] print 'bottom 5 correlated users to user 33 are' print bot
import recommendations print recommendations.getRecommendations(recommendations.critics,'Toby') print recommendations.getRecommendations(recommendations.critics,'Toby', similarity=recommendations.sim_distance) movies=recommendations.transformPrefs(recommendations.critics) print recommendations.topMatches(movies,'Superman Returns') print recommendations.getRecommendations(movies,'Just My Luck') itemsim=recommendations.calculateSimilarItems(recommendations.critics) print itemsim print recommendations.getRecommendedItems(recommendations.critics,itemsim,'Toby')
#涉及影评者及其对几部影片评分情况的字典 critics = {'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, 'Superman Returns': 3.5, \ 'You, Me and Dupree': 2.5, 'The Night Listener': 3.0}, \ 'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5, 'Just My Luck': 1.5,'Superman Returns': 5.0, \ 'You, Me and Dupree': 3.5, 'The Night Listener': 3.0}, \ 'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0, 'Superman Returns': 3.5, \ 'The Night Listener': 4.0}, \ 'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, 'Superman Returns': 4.0, \ 'You, Me and Dupree': 2.5, 'The Night Listener': 4.5}, \ 'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, 'Just My Luck': 2.0, 'Superman Returns': 3.0,\ 'You, Me and Dupree': 2.0, 'The Night Listener': 3.0}, \ 'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, 'Superman Returns': 5.0, \ 'You, Me and Dupree': 3.5, 'The Night Listener':3.0}, \ 'Toby': {'Snakes on a Plane': 4.5, 'Superman Returns': 4.0, 'You, Me and Dupree': 1.0}} list_of_recommendations = recommendations.topMatches(critics, 'Toby', n=3) print('when algorithm of similarity is sim_distance:') print(list_of_recommendations) list_of_recommendations = recommendations.topMatches( critics, 'Toby', n=3, similarity=recommendations.sim_pearson) print('when algorithm of similarity is sim_pearson:') print(list_of_recommendations) person_getRecommendations = recommendations.getRecommendations(critics, 'Toby') print('when algorithm of similarity is sim_distance:') print(person_getRecommendations) person_getRecommendations = recommendations.getRecommendations( critics, 'Toby', similarity=recommendations.sim_pearson) print('when algorithm of similarity is sim_pearson:')
def product(product_id): ratings = Rating.query.filter_by(product_id=product_id).all() product = Product.query.filter_by(id=product_id).first() category = Category.query.filter_by(id=product.category_id).first() total = 0 for r in ratings: total += r.rating if product.rating_count != 0: product.rating = total / product.rating_count product.rating = round(product.rating, 2) total = 0 users = [] comments = [] rating = [] ratings = db.engine.execute( text( "select User.username, Rating.rating, Rating.comment from Rating left join User on Rating.user_id = User.id where Rating.product_id = " + str(product_id))) for r in ratings: rating.append(r[1]) users.append(r[0]) comments.append(r[2]) total += 1 ratings = Rating.query.all() recommended_products = {} for i in range(len(ratings)): user = User.query.filter_by(id=ratings[i].user_id).first() product_check = Product.query.filter_by( id=ratings[i].product_id).first() if user.username not in recommended_products.keys(): recommended_products[user.username] = {} if product_check.name not in recommended_products[ user.username].keys(): recommended_products[user.username][ product_check.name] = ratings[i].rating recommended_products = transformPrefs(recommended_products) if product.name not in recommended_products: product_list = [] else: recommended_products = topMatches(recommended_products, product.name, 4) product_list = [ Product.query.filter_by(name=product[1]).first() for product in recommended_products ] return render_template("product_details.html", recommended_products=product_list, product=product, category=category, ratings=rating, users=users, comments=comments, total=total)
import recommendations print recommendations.topMatches(recommendations.critics,'Toby',n=3) [(0.99124070716192991, 'Lisa Rose'), (0.92447345164190486, 'Mick LaSalle'), (0.89340514744156474, 'Claudia Puig')]
def test_topMatches_result(self): self.assertAlmostEqual( recommendations.topMatches(critics, 'Toby', n=3), [(0.99124070716192991, 'Lisa Rose'), (0.92447345164190486, 'Mick LaSalle'), (0.89340514744156474, 'Claudia Puig')], 5)
__author__ = 'ponomarevandrew' from recommendations import critics import recommendations print critics['Lisa Rose']['Lady in the Water'] critics['Toby']['Snakes on a Plane'] = 4.5 print critics['Toby'] print critics print "-----------" print recommendations.getRecommendations(recommendations.critics, 'Toby') print "-----------" movies = recommendations.transformPrefs(recommendations.critics) print recommendations.topMatches(movies, 'Superman Returns') print "-----------" itemsim = recommendations.calculateSimilarItems(recommendations.critics) print itemsim print "-----------" print recommendations.getRecommendedItems(recommendations.critics, itemsim, 'Toby') print "-----------" prefs = recommendations.loadMovieLens() print prefs['1'] print "-----------" print recommendations.getRecommendations(prefs,'42')[31:60] print "-----------" ##itemsim = recommendations.calculateSimilarItems(prefs, n =50) ##print itemsim print "-----------" ##print recommendations.getRecommendedItems(prefs,itemsim, '87')[0:30] print "-----------"
import recommendations print "distance" print recommendations.sim_distance(recommendations.critics, 'ming', 'lin') print recommendations.sim_distance(recommendations.critics, 'ming', 'michael') print recommendations.sim_distance(recommendations.critics, 'ming', 'mick') print "pearson" print recommendations.sim_pearson(recommendations.critics, 'ming', 'lin') print recommendations.sim_pearson(recommendations.critics, 'ming', 'michael') print recommendations.sim_pearson(recommendations.critics, 'ming', 'mick') print "top match" print recommendations.topMatches(recommendations.critics, 'ming', n = 3) print "recommendation" print recommendations.getRecommendations(recommendations.critics, 'ming') print "recommend items" items = recommendations.transformPrefs(recommendations.critics) print recommendations.topMatches(items, 'lady in the water', n =3) print recommendations.sim_pearson(items, 'lady in the water', 'snake on a plane') print "item similarity use distance" print recommendations.calculateSimilarItems(recommendations.critics) print 'item similarity use pearson' print recommendations.calculateSimilarItems(recommendations.critics, similarity = recommendations.sim_pearson) print 'recommendation base on items' itemsim = recommendations.calculateSimilarItems(recommendations.critics)
def fillItems(user_dict): all_items = {} # Find links posted by all users for user in user_dict: for i in range(3): try: posts = get_userposts(user) break except: print "Failed user " + user + ", retrying" time.sleep(4) for post in posts: url = post['url'] user_dict[user][url] = 1.0 all_items[url] = 1 # Fill in missing items with 0 for ratings in user_dict.values(): for item in all_items: if item not in ratings: ratings[item] = 0.0 #delusers = initializeUserDict('programming') #fillItems(delusers) #print delusers user = delusers.keys()[random.randint(0, len(delusers) - 1)] print topMatches(delusers, user)
# encoding: utf-8 ''' Created on 2015年7月19日 @author: HCY ''' from recommendations import critics, sim_distance, sim_pearson import recommendations #print(critics['Lisa Rose']['Lady in the Water']) print(critics['Hcy']) print(recommendations.sim_distance(critics, 'Lisa Rose', 'Gene Seymour')) print(recommendations.sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')) print(recommendations.topMatches(critics, 'Hcy',n=3,similarity=sim_pearson)) print(recommendations.topMatches(critics, 'Hcy',n=3,similarity=sim_distance)) print(recommendations.getRecommendations(critics, 'Hcy'))
#!/usr/bin/env python # coding=utf-8 # filename : recommendations_test.py # author : Chase # update : 2016/09/10 import recommendations print("distance between Leo and Cathy is:", recommendations.sim_distance(recommendations.critics, 'Leo', 'Cathy')) print("distance between Susie and Cathy is:", recommendations.sim_distance(recommendations.critics, 'Susie', 'Cathy')) print("Antonio TopMatches:", recommendations.topMatches(recommendations.critics, 'Antonio', n=3)) print("Leo's recommendations:", recommendations.getRecommendations(recommendations.critics, 'Leo'))
import matplotlib.pyplot as plt import numpy as np import scipy.spatial.distance as sci import recommendations as rec dict = rec.topMatches(rec.critics, person='Toby', similarity='sim_euclid') print "euklid" print dict print "_"*80 dict = rec.topMatches(rec.critics, person='Toby', similarity='sim_pearson') print "pearson" print dict recommendations = rec.getRecommendations(rec.critics, 'Toby', 'sim_pearson') print "Rec" print recommendations print "UCF" print rec.critics transCritics = rec.transformCritics(rec.critics, 'sim_euclid') print "ICF" print transCritics #print rec.topMatches(transCritics, 'Lady in the Water', 'sim_pearson') print rec.calculateSimilarItems(transCritics, 'sim_pearson')
# Commands used to demonstrate / use functions created in Collective Intelligence import recommendations from recommendations import critics # pg 9: critics['Lisa Rose']['Lady in the Water'] critics['Toby']['Snakes on a Plane'] = 4.5 critics['Toby'] # pg 11 recommendations.sim_distance( recommendations.critics, 'Lisa Rose', 'Gene Seymour' ) # pg 14 recommendations.sim_pearson( recommendations.critics, 'Lisa Rose', 'Gene Seymour' ) # pg 15 recommendations.topMatches( recommendations.critics, 'Toby', n = 3 ) # pg 17 recommendations.getRecommendations(recommendations.critics, 'Toby') recommendations.getRecommendations(recommendations.critics, 'Toby', similarity = recommendations.sim_distance) movies = recommendations.transformPrefs( recommendations.critics ) recommendations.topMatches(movies, 'Superman Returns') recommendations.getRecommendations(movies, 'Just My Luck')
def test_TopMatches(self): self.assertEqual(recommendations.topMatches(recommendations.critics,'Toby',n=1)[0],(0.99124070716192991, 'Lisa Rose'))
import recommendations as rec pref = rec.loadMovieLens() correlated = rec.topMatches(1, pref, '73') noncorrelated = rec.topMatches(0, pref, '73') print "Five Most Correlated Users: " + '73' print "--------------------------------------------------------" for user in correlated: print user[1] print "--------------------------------------------------------" print "Five Least Correlated Users: " + '73' print "--------------------------------------------------------" for user in noncorrelated: print user[1]
if key == '': continue user_dict[key] = eval(r.hget(user_dict_key,key)) return user_dict if __name__ == '__main__': #user_dict = initializeUserDict('programming') #fillItems(user_dict) #save_user_dict(user_dict) user_dict = get_user_dict('user_dict') # Find users most like a given user user = user_dict.keys()[random.randint(0, len(user_dict)-1)] top_matches = recommendations.topMatches(user_dict, user) print "Top matches for", user, ":" print "{0:6} {1}".format( "Score", "Name") for match in top_matches: print "{0:.4f} {1:6}".format(match[0], match[1]) print "\nRecommended URLs for", user, ":" recs = recommendations.getRecommendations(user_dict, user)[0:10] print "{0:6} {1}".format( "Score", "URL") for item in recs: print "{0:.4f} {1}".format(item[0], item[1]) print "placeholder for breakpoint"
user_url_tuple = load(cache_file) cache_file.close() else: user_url_tuple = sorted([( user, post["url"], ) for user in user_dict for post in get_userpost_details(user)]) if cache_dict: print "Saving to cache file" cache_file = open(items_cache_file, "wb") dump(user_url_tuple, cache_file) cache_file.close() #Very Imperative grouped_by_values = groupby(user_url_tuple, lambda (user, _): user) distinct_url_sequence = map(lambda (_, url): (url, 0), user_url_tuple) for user in user_dict: user_dict[user] = dict(distinct_url_sequence) for user, grouped_values in grouped_by_values: for _, url in grouped_values: user_dict[user][url] = 1 user_dict = initializeUserDict('technology', count=10) fillItems(user_dict) user = user_dict.keys()[random.randint(0, len(user_dict) - 1)] print "Top matches for ", user, " are ", topMatches(user_dict, user) print "Recommendations for user ", user, " are ", getRecommendations( user_dict, user)
ratings[item] = 0.0 #全局测试代码 if 0: pl = get_popular('programming') url = pl[0]['url'] for item in pl: url = item['url'] print url for item in pl: url = item['url'] print url print get_urlposts(url) print get_urlposts('http://www.holub.com/goodies/uml/') #print url # print get_urlposts(url) #全局测试代码二 if 0: delusers = initializeUserDict('programming') #可以向空词典内加入任意相加的用户 delusers['tsegaran'] = {} fillItems(delusers) #随机选择一位用户 import random user = delusers.keys()[random.randint(0, len(delusers) - 1)] print user import recommendations print recommendations.topMatches(delusers, user)
# instantiate network network = pl.get_lastfm_network() # get Band band = network.get_artist("J.B.O") topfans = band.get_top_fans(10) group=[a.item for a in topfans] #print band #print topfans #print group userDict = recom.createLastfmUserDict(group) selectedUser = group[1].get_name() print "Selected User: "******"====================================" topMatches = recom.topMatches(userDict, group[1].get_name(), recom.sim_euclid) print "Top Matches: " pp.pprint(topMatches) print "====================================" recommendations = recom.getRecommendations(userDict, group[1].get_name(), recom.sim_euclid) print "Recommendations: " pp.pprint(recommendations) print "====================================" #print result[0][1].key()
# instantiate network network = pl.get_lastfm_network() # get Band band = network.get_artist("J.B.O") topfans = band.get_top_fans(10) group = [a.item for a in topfans] #print band #print topfans #print group userDict = recom.createLastfmUserDict(group) selectedUser = group[1].get_name() print "Selected User: "******"====================================" topMatches = recom.topMatches(userDict, group[1].get_name(), recom.sim_euclid) print "Top Matches: " pp.pprint(topMatches) print "====================================" recommendations = recom.getRecommendations(userDict, group[1].get_name(), recom.sim_euclid) print "Recommendations: " pp.pprint(recommendations) print "====================================" #print result[0][1].key()
import recommendations print('-------') movies = recommendations.transformPrefs(recommendations.critics) result = recommendations.topMatches(movies, 'Supermen Returns') print(result) print('-------') result = recommendations.getRecommendations(movies, 'Just My Luck') print(result)
import recommendations choice = raw_input('enter your search type\n1.By user\n2.By Book\nChoice:\t') if choice == '1': user = raw_input('enter the user ID:\t') print '\n' print 'Books till now read by the user are:\n' print critics[user], '\n' dischoice = raw_input( 'enter your choice\n1.euclidian distance\n2.pearson corelation\nchoice:\t' ) if dischoice == '1': print 'top matched user according to the euclidian distance\n' print recommendations.topMatches(critics, user, 3, recommendations.sim_distance), '\n' print 'Recommended books according to the euclidian distance\n' print recommendations.getRecommendations( critics, user, similarity=recommendations.sim_distance)[0:3], '\n' else: if dischoice == '2': print 'top matched user according to the pearson corelation\n' print recommendations.topMatches(critics, user, 3), '\n' print 'Recommended books according to the pearson corelaton\n' print recommendations.getRecommendations(critics, user)[0:3], '\n' else: print 'invalid choice... try again' else: if choice == '2': critic = recommendations.transformPrefs(critics) book = raw_input('enter the book name:\t')
continue current_cor = rec.sim_pearson(prefs, substitute, id) correlated_list.append([current_cor, id]) sorted_by_correlation = sorted( correlated_list) #[-5:] # sorted list of users correlated to substitute least_correlated_users = sorted_by_correlation[::-1][ -5:] # top 5 least correlated users: [::-1] reverses the list most_correlated_users = sorted_by_correlation[ -5:] # top 5 most correlated users print "most correlated users and their correlation value", most_correlated_users # part 2 print "least correlated users and their correlation value", least_correlated_users # part 2 top_matches = [] bot_matches = [] top_matches = rec.topMatches(prefs, substitute, 5) bot_matches = rec.botMatches(prefs, substitute, 5) print top_matches print bot_matches ''' incomplete work on extra credit problem 5 movie = recordtype.recordtype("Movie", "id title") # named tuple containing movie id and title movies = [] ratingList= [] movielens_titlelist = [] with open("movielens/u.item", "r") as movielist: for line in movielist: id, title, release, release2, imdb_url, unknown, action, adventure, animation, childrens, comedy, crime, documentary, drama, fantasy, film_noir, horror, musical, mystery, romance, sci_fi, thriller, war, western = line.split("|") id_int=int(id) movies.append(movie(id=id, title=title))
from recommendations import critics from recommendations2 import moviesgrade from recommendations import sim_distance from recommendations import sim_pearson from recommendations import topMatches from recommendations import pearsonTopMatches from recommendations import distanceTopMatches from recommendations import getRecommendations from recommendations2 import transformPrefs #print(distanceTopMatches(critics, 'Toby', n=3)) # transformPrefs()는 Map 객체의 key값과 value값을 교환함 movies = transformPrefs(critics) # Superman Returns과 유사한 영화 top 3 print(topMatches(movies, 'Superman Returns', 2)) # 평론가 추천 print(getRecommendations(movies, 'Just My Luck'))
def testNLargetThanCount(self): m = recommendations.topMatches(self.data, 'Kerstin', n=2*len(self.data)) self.assertEquals(len(self.data) - 1, len(m))
items_cache_file = "items_cache.p" if cache_dict and exists(items_cache_file): print "Reading the items from cache file", items_cache_file cache_file = open(items_cache_file, "rb") user_url_tuple = load(cache_file) cache_file.close() else: user_url_tuple = sorted([(user, post["url"], ) for user in user_dict for post in get_userpost_details(user)]) if cache_dict: print "Saving to cache file" cache_file = open(items_cache_file, "wb") dump(user_url_tuple, cache_file) cache_file.close() #Very Imperative grouped_by_values = groupby(user_url_tuple, lambda (user, _): user) distinct_url_sequence = map(lambda (_, url) : (url, 0), user_url_tuple) for user in user_dict: user_dict[user] = dict(distinct_url_sequence) for user, grouped_values in grouped_by_values: for _, url in grouped_values: user_dict[user][url] = 1 user_dict = initializeUserDict('technology', count=10) fillItems(user_dict) user = user_dict.keys()[random.randint(0, len(user_dict) - 1)] print "Top matches for ", user, " are ", topMatches(user_dict, user) print "Recommendations for user ", user, " are ", getRecommendations(user_dict, user)
correlation = sys.argv[3] ratingsdict = getRatingsFromFile(ratingsfile) raters = ratingsdict.keys() if correlation == 'agreed': reversesort = True else: reversesort = False comparedRaters = {} for i in range(0, len(raters)): # O(n) best = recommendations.topMatches(ratingsdict, raters[i], n=len(raters)) best.sort(reverse=reversesort) if best[0][1] == raters[i]: best.pop() # remove dupes if (best[0][1], raters[i]) not in comparedRaters: comparedRaters[(raters[i], best[0][1])] = best[0][0] for item in sorted( comparedRaters, key=comparedRaters.get, reverse=reversesort ): print str(item[0]) + '\t' + str(item[1]) + '\t' + \ str(comparedRaters[item])
def find_correlated_users_subsitute(id, lens_data): print('best matches: ') print(recommendations.topMatches(lens_data,id),"\n")
#!/usr/local/bin/python import sys import recommendations if __name__ == '__main__': fav_moviename = "Die Hard (1988)" hate_moviename = "Kazaam (1996)" prefs = recommendations.loadMovieLens() itemPrefs = recommendations.transformPrefs(prefs) fav_results = recommendations.topMatches(itemPrefs, fav_moviename, 2000) hate_results = recommendations.topMatches(itemPrefs, hate_moviename, 2000) print "Most 5 correlated for my top favourite movie" for i in fav_results[0:5]: print i[0], i[1] print '\n' print "Least 5 correlated for my top favourite movie" fav_results.reverse() for i in fav_results[0:5]: print i[0], i[1] print '\n' print "Most 5 correlated for my least favourite movie" for i in hate_results[0:5]: print i[0], i[1] print '\n' print "Least 5 correlated for my least favourite movie" hate_results.reverse() for i in hate_results[0:5]: print i[0], i[1]
import recommendations as recommend #print recommend.critics['Toby'] # print recommend.sim_distance(recommend.critics, 'Lisa Rose' ,'Gene Seymour') # # print recommend.sim_distance(recommend.critics, 'Lisa Rose' ,'Michael Phillips') # # print recommend.sim_distance(recommend.critics, 'Lisa Rose' ,'Claudia Puig') # # print recommend.sim_distance(recommend.critics, 'Lisa Rose' ,'Mick LaSalle') # # print recommend.sim_distance(recommend.critics, 'Lisa Rose' ,'Jack Matthews') # # print recommend.sim_distance(recommend.critics, 'Lisa Rose' ,'Toby') #print recommend.sim_pearson(recommend.critics, 'Lisa Rose' ,'Gene Seymour') print recommend.topMatches(recommend.critics, 'Toby', n=3) print recommend.topMatches(recommend.critics, 'Toby', n=3, similarity=recommend.sim_distance)
#!/usr/local/bin/python import sys import recommendations if __name__ == '__main__': fav_moviename = "Die Hard (1988)" hate_moviename = "Kazaam (1996)" prefs = recommendations.loadMovieLens() itemPrefs = recommendations.transformPrefs(prefs) fav_results = recommendations.topMatches(itemPrefs,fav_moviename,2000) hate_results = recommendations.topMatches(itemPrefs,hate_moviename,2000) print "Most 5 correlated for my top favourite movie" for i in fav_results[0:5]: print i[0],i[1] print '\n' print "Least 5 correlated for my top favourite movie" fav_results.reverse() for i in fav_results[0:5]: print i[0],i[1] print '\n' print "Most 5 correlated for my least favourite movie" for i in hate_results[0:5]: print i[0],i[1] print '\n' print "Least 5 correlated for my least favourite movie" hate_results.reverse() for i in hate_results[0:5]: print i[0],i[1]
from recommendations import topMatches, getRecommendations, transformPrefs from recommendations import calculateSimilarItems, getRecommendedItems # print critics['Lisa Rose']['Lady in the Water'] # # critics['Toby']['Snakes on a Plane'] = 4.5 # print critics['Toby'] print 'Euclidean Distance Score of Lisa Rose and Gene Seymour is ' print sim_distance(critics, 'Lisa Rose', 'Gene Seymour') print 'Pearson Correlation Score of Lisa Rose and Gene Seymour is ' print sim_pearson(critics, 'Lisa Rose', 'Gene Seymour') print 'TopMatches 3 for Toby is ' print topMatches(critics, 'Toby', n=3) # User-Based CF print '通过按人群与 Toby 相似度,加权重新评分,为影片排名获得推荐: ' print getRecommendations(critics, 'Toby') print '通过查看哪些人喜欢 Superman Returns,以及这些人喜欢哪些其他物品来确定相似度:' movies = transformPrefs(critics) print topMatches(movies, 'Superman Returns') print '可能最喜欢 Just My Luck 的人群列表(对调人和物不一定能获得有用信息):' print getRecommendations(movies, 'Just My Luck') # Item-Based CF
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/2/26 16:19 # @Author : Shuqi.qin # @Site : # @File : test.py # @Software: PyCharm import recommendations as rc res1 = rc.sim_distance(rc.critics, "Lisa Rose", "Gene Seymour") res2 = rc.sim_pearson(rc.critics, "Lisa Rose", "Gene Seymour") res3 = rc.topMatches(rc.critics, "Toby", n=3) res4 = rc.getRecommendationsBaseOnPersonSimilarity(rc.critics, "Toby") print 'over' import pydelicious print pydelicious.get_popular(tag='programming')
def test_TopMatches(self): self.assertEqual( recommendations.topMatches(recommendations.critics, 'Toby', n=1)[0], (0.99124070716192991, 'Lisa Rose'))
all_items={} # Find links posted by all users for user in user_dict: for i in range(3): try: posts=get_userposts(user) break except: print "Failed user "+user+", retrying" time.sleep(4) for post in posts: url=post['url'] user_dict[user][url]=1.0 all_items[url]=1 # Fill in missing items with 0 for ratings in user_dict.values(): for item in all_items: if item not in ratings: ratings[item]=0.0 #delusers = initializeUserDict('programming') #fillItems(delusers) #print delusers user = delusers.keys()[random.randint(0, len(delusers)-1)] print topMatches(delusers, user)
import recommendations print('-------') result = recommendations.topMatches(recommendations.critics, "Toby", n=4) print(result)
from math import sqrt import recommendations # Find similarity between two users Euclidian distance print( recommendations.sim_distance(recommendations.critics, 'Lisa Rose', 'Gene Seymour')) # Find similarity between two users Pearson distance print( recommendations.sim_pearson(recommendations.critics, 'Lisa Rose', 'Gene Seymour')) #Find similar users print(recommendations.topMatches(recommendations.critics, 'Toby', n=3)) #Get recommendations of movies default=Pearson print(recommendations.getRecommendations(recommendations.critics, 'Toby')) #Get recommendations of movies using Euclidian print( recommendations.getRecommendations( recommendations.critics, 'Toby', similarity=recommendations.sim_distance)) # Transpose the matrix to movies vs user movies = recommendations.transformPrefs(recommendations.critics) #Print similar movies
import recommendations as rec # 2.4.1.1 # transformation of the critics matrix to {"movie":{"Person1":"similarity", ...}} transCritics = rec.transposeMatrix(rec.critics) # 2.4.1.2 # similar_items_euclidean = rec.calculateSimilarItems(transCritics, rec.sim_euclid_normed) similar_items_pearson = rec.calculateSimilarItems(transCritics, rec.sim_pearson) print rec.topMatches(transCritics, 'Lady in the Water', rec.sim_euclid) # 2.4.1.3 print "Recommended Movies (euclidean): " + str(rec.getRecommendedItems(rec.critics, 'Toby Segaran', similar_items_euclidean)) print "Recommended Movies (pearson): " + str(rec.getRecommendedItems(rec.critics, 'Toby Segaran', similar_items_pearson))
#from deliciousrec import * #delusers=initializeUserDict('programming') #delusers['tsegaran']={} #fillIteams(delusers) import recommendations #print recommendations.critics['Lisa Rose']['Lady in the Water'] print recommendations.critics['Lisa Rose'] print recommendations.critics['Gene Seymour'] print recommendations.sim_distance(recommendations.critics,'Lisa Rose','Gene Seymour') print recommendations.sim_pearson(recommendations.critics,'Lisa Rose','Gene Seymour') print recommendations.topMatches(recommendations.critics,'Toby',n=3) print recommendations.topMatches(recommendations.critics,'Toby',n=3,similarity=recommendations.sim_distance) recommendations.getRecommendations(recommendations.critics,'Toby') recommendations.getRecommendations(recommendations.critics,'Toby',similarity=recommendations.sim_distance) movies=recommendations.transformPrefs(recommendations.critics) #print movies recommendations.topMatches(movies,'Superman Returns') recommendations.getRecommendations(movies,'Just My Luck')
toprecs = getRecommendations1(critics, a)[0:5] # finds critics similar to these and gets 5 recommendations for film in toprecs: print white(booktitles[film[1]]) print "\n--------------------------------------------------------\n" print_UBRecommendations(critics, userId) # PRINT SOME USER BASED RECOMMENDATIONS scores2 = find_users_favoritebooks(critics, userId) # FIND USER '60255' FAVORITE BOOKS # MAKE SOME ITEM BASED RECOMMENDATIONS FROM USERS FAVORITE BOOKS print " \n----------------Item based recommendations---------------" # First make a list of ratings from scores2 ---- [rating, .... topchoices = scores2.keys() topchoices.sort() topchoices.reverse() count2 = 0 # Get Item based recommendations based upon the 3 favorite books of '60255' for a22 in topchoices: for b22 in scores2[a22]: if count2 == 5: continue else: try: # Get 5 item based recommendations for each book bookdic = recommendations.transformPrefs(critics) werec1 = recommendations.topMatches(bookdic, b22) print "\nRecommendations based upon users rating of >",cyan(booktitles[b22]),"\n" # Convert BookIds to booktitles convertId_to_title(werec1, 10) count2 += 1 except: print "BOOK ID",b22,"NOT IN DATABASE"
for i in range(3): try: posts = get_userposts(user) break except: print "Failed user " + user + ",retrying" time.sleep(4) for post in posts: url = post['url'] user_dict[user][url] = 1.0 all_items[url] = 1 # Fill in missing items with 0 for ratings in user_dict.values(): for item in all_items: if item not in ratings: ratings[item] = 0.0 if __name__ == "__main__": delusers = initializeUserDict('programming') delusers['onerhao'] = {} fillItems(delusers) print delusers user = delusers.keys()[random.randint(0, len(delusers) - 1)] print user print recommendations.topMatches(delusers, user) print recommendations.getRecommendations(delusers, user) # url = recommendations.getRecommendations(delusers, user)[0][1] # print recommendations.topMatches( # recommendations.transformPrefs(delusers), url)
def print_RecommendedCritics(critics, userId='60255'): cyan = _wrap_with('36') print "\n-----Similar Critics using Top Match and sim_pearson-----\n" top = recommendations.topMatches(critics, userId, n=5) for toppy in top: print cyan(toppy[1]),"with a score of",cyan(toppy[0]) return top
distances = [] for p1 in critics: for p2 in critics: if p1 != p2: distances.append((recommendations.sim_distance(recommendations.critics, p1, p2), p1, p2)) distances.sort() distances.reverse() distances ########################### reload(recommendations) recommendations.sim_distance(recommendations.critics, 'Lisa Rose','Gene Seymour') recommendations.sim_pearson(recommendations.critics, 'Lisa Rose','Gene Seymour') recommendations.topMatches(recommendations.critics,'Toby', n = 3) ###################################################### # del.icio.us: ###################################################### delusers = deliciousrec.initializeUserDict('gratis') delusers['lpagliari'] = {} # Add yourself to the dictionary if you use delicious deliciousrec.fillItems(delusers) delusers ########################### # pick random user and find users similar to him/her: import recommendations reload(recommendations) import random
#!/usr/bin/python from recommendations import critics, sim_distance, sim_pearson, topMatches if __name__ == '__main__': print topMatches(critics, 'Toby', n=3)