def run_examples(): import random user = USER_DICT.keys()[random.randint(0, len(USER_DICT) - 1)] print user top = recommendations.top_matches(USER_DICT, user) print top recs = recommendations.get_recommendations(USER_DICT, user)[:10] print recs url = recs[0][1] more_top = recommendations.top_matches(recommendations.transform_prefs(USER_DICT), url) print more_top
def run_examples(): import random user = USER_DICT.keys()[random.randint(0, len(USER_DICT) - 1)] print user top = recommendations.top_matches(USER_DICT, user) print top recs = recommendations.get_recommendations(USER_DICT, user)[:10] print recs url = recs[0][1] more_top = recommendations.top_matches( recommendations.transform_prefs(USER_DICT), url) print more_top
def __main__(): tags_dict = build_tag_dict(build_tag_list("programming")) print "Similar Tags" recs = recommendations.top_matches(tags_dict, "programming", similarity=ex1.sim_tanimato) pprint.pprint(recs) best_rec = recs[0][1] print "Similar Tag, But Not Sharing Tags" pprint.pprint([tag for tag in tags_dict[best_rec] if tags_dict[best_rec][tag] and not tags_dict["programming"][tag]]) # >>> __main__() # Similar Tags # [(0.17391304347826086, u'development'), # (0.15789473684210525, u'course'), # (0.14285714285714285, u'java'), # (0.1, u'coursera'), # (0.09090909090909091, u'gwt')] # Similar Tag, But Not Sharing Tags # [u'http://wheel.readthedocs.org/en/latest/', # u'http://littlegists.blogspot.co.uk/2012/12/building-simple-nancy-app-from-scratch.html', # u'http://msdn.microsoft.com/en-us/library/dn155905.aspx', # u'http://www.amityadav.name/code-analyzers-for-php/', # u'http://www.babycenter.com/0_20-fun-silly-development-boosting-games-to-play-with-your-ba_1479310.bc?scid=momsbaby_20130618:4&pe=MlVEME9CaXwyMDEzMDYxOA..', # u'http://edition.cnn.com/2013/06/19/opinion/technology-change-lives-african-women-jamme/?hpt=hp_t5', # u'https://django-cookie-consent.readthedocs.org/en/latest/', # u'http://www.bbc.co.uk/news/world-22964022', # u'https://docs.google.com/presentation/d/1IRHyU7_crIiCjl0Gvue0WY3eY_eYvFQvSfwQouW9368/present#slide=id.gebc26cd7_8_0']
user_path = "cache/delicious_users" if os.path.exists(user_path ): users = load(open(user_path,"r")) else: print "fetching usernames" users = get_similar_users(u"python",count = 5) users.append("xiaohan2012")#add myself,hehe! dump(users,open(user_path,"w")) print "user count: %d" %len(users) prefs_path = "cache/delicious_prefs" if os.path.exists(prefs_path ): prefs = load(open(prefs_path ,"r")) print len(prefs) else: print "fetching user post info" prefs = get_prefs(users) dump(prefs,open(prefs_path ,"w")) ratio = [sum(prefs[user].values())/len(prefs[user].values()) for user in prefs] ratio.sort(reverse = True) from recommendations import top_matches,get_recommendations import random #user = random.choice((prefs.keys())) print user user = "******" print top_matches(user,prefs) print get_recommendations(user,prefs) print "It is empty, I think we might have encountered data sparsity problem, haha"
return userDict def fillItems(userDict): allItems = {} for user in userDict: for _ in range(3): try: posts = get_userposts(user) break except: print 'Failed user ' + user + ', retrying...' time.sleep(4) for post in posts: url = post['url'] userDict[user][url] = 1.0 allItems[url] = 1 for ratings in userDict.values(): for item in allItems: if item not in ratings: ratings[item] = 0.0 if __name__ == '__main__': delusers = initializeUserDict('programming') fillItems(delusers) user = delusers.keys()[random.randint(0, len(delusers) - 1)] print user print top_matches(delusers, user)[0:5] print get_recommendations(delusers, user)[0:5]
# 欧几里得距离评价 euclid1 = sim.sim_euclid(critics, 'Lisa Rose', 'Gene Seymour') euclid2 = nsim.sim_euclid(critics, 'Lisa Rose', 'Gene Seymour') print("欧几里得距离评价:") print(euclid1) print(euclid2) # 皮尔逊相关度评价 pearson1 = sim.sim_pearson(critics, 'Lisa Rose', 'Gene Seymour') pearson2 = nsim.sim_pearson(critics, 'Lisa Rose', 'Gene Seymour') print("皮尔逊相关度评价:") print(pearson1) print(pearson2) # 余弦相似度评价 cosine1 = nsim.sim_cosine(critics, 'Lisa Rose', 'Gene Seymour') print("余弦相似度评价:") print(cosine1) # jaccard相似度评价: 这个例子说明了jaccard在此场景下的不精确性,虽然二者所评价的电影集合都相同,但是评分不同的差异被忽略了。 jaccard1 = sim.sim_jaccard(critics, 'Lisa Rose', 'Gene Seymour') print("jaccard相似度评价:") print(jaccard1) # 基于商品的评价 movie_pearson = top_matches(transform_profs(critics), 'Superman Returns', similarity=sim.sim_pearson, n=5) print(movie_pearson)
import recommendations critics = recommendations.critics # print(critics['Lisa Rose']) # # print("Similarity using Euclidean Distance: \n") # # Using Euclidean Distance # for critic in critics: # print("Similarity with " + critic + " : " + str(recommendations.sim_distance("Lisa Rose", critic, critics))) # # print("Similarity using Pearson Correlation: \n") # # Using Correlation # for critic in critics: # print("Similarity with " + critic + " : " + str(recommendations.sim_pearson("Lisa Rose", critic, critics))) print("Top 5 Matches for Lisa") print(recommendations.top_matches("Lisa Rose", critics)) print("Recommendations for Toby") print(recommendations.get_recommendations("Toby", critics)) movies = recommendations.flip_params(critics) print("If you like superman returns then") print(recommendations.top_matches("Superman Returns", movies))
def test_top_matches_length(self): limit = random.randint(0, len(data.critics) - 1) matches = recommendations.top_matches(data.critics, pick_a_name(), n=limit) (matches).should.have.length_of(limit)
import recommendations import deliciousrec delicious_users = deliciousrec.initUserDict('Python', 10) delicious_users = deliciousrec.fillItems(delicious_users) test_user = delicious_users.keys()[0] print recommendations.top_matches(delicious_users, test_user) print recommendations.get_recommendatios(delicious_users, test_user)[0:4]
users = load(open(user_path, "r")) else: print "fetching usernames" users = get_similar_users(u"python", count=5) users.append("xiaohan2012") #add myself,hehe! dump(users, open(user_path, "w")) print "user count: %d" % len(users) prefs_path = "cache/delicious_prefs" if os.path.exists(prefs_path): prefs = load(open(prefs_path, "r")) print len(prefs) else: print "fetching user post info" prefs = get_prefs(users) dump(prefs, open(prefs_path, "w")) ratio = [ sum(prefs[user].values()) / len(prefs[user].values()) for user in prefs ] ratio.sort(reverse=True) from recommendations import top_matches, get_recommendations import random #user = random.choice((prefs.keys())) print user user = "******" print top_matches(user, prefs) print get_recommendations(user, prefs) print "It is empty, I think we might have encountered data sparsity problem, haha"
# Library imports import recommendations, deliciousrec # Initialize delicious users delusers=deliciousrec.initialize_user_dict('programming') # Add myself to the dataset delusers['rogerfernandezg']={} # Fills delicious users with data from delicious deliciousrec.fill_items(delusers) # Show recommendations for specific user user=delusers.keys()[1] print recommendations.top_matches(delusers,user)[0:10] url=recommendations.get_recommendations(delusers,user)[0][1] print recommendations.top_matches(recommendations.transform_prefs(delusers),url)