def run_examples():
	import random
	user = USER_DICT.keys()[random.randint(0, len(USER_DICT) - 1)]
	print user
	top = recommendations.top_matches(USER_DICT, user)
	print top
	recs = recommendations.get_recommendations(USER_DICT, user)[:10]
	print recs
	url = recs[0][1]
	more_top = recommendations.top_matches(recommendations.transform_prefs(USER_DICT), url)
	print more_top
예제 #2
0
def run_examples():
    import random
    user = USER_DICT.keys()[random.randint(0, len(USER_DICT) - 1)]
    print user
    top = recommendations.top_matches(USER_DICT, user)
    print top
    recs = recommendations.get_recommendations(USER_DICT, user)[:10]
    print recs
    url = recs[0][1]
    more_top = recommendations.top_matches(
        recommendations.transform_prefs(USER_DICT), url)
    print more_top
예제 #3
0
def __main__():
	tags_dict = build_tag_dict(build_tag_list("programming"))
	print "Similar Tags"
	recs = recommendations.top_matches(tags_dict, "programming", similarity=ex1.sim_tanimato)
	pprint.pprint(recs)
	best_rec = recs[0][1]
	print "Similar Tag, But Not Sharing Tags"
	pprint.pprint([tag for tag in tags_dict[best_rec] if tags_dict[best_rec][tag] and not tags_dict["programming"][tag]])

# >>> __main__()
# Similar Tags
# [(0.17391304347826086, u'development'),
#  (0.15789473684210525, u'course'),
#  (0.14285714285714285, u'java'),
#  (0.1, u'coursera'),
#  (0.09090909090909091, u'gwt')]
# Similar Tag, But Not Sharing Tags
# [u'http://wheel.readthedocs.org/en/latest/',
#  u'http://littlegists.blogspot.co.uk/2012/12/building-simple-nancy-app-from-scratch.html',
#  u'http://msdn.microsoft.com/en-us/library/dn155905.aspx',
#  u'http://www.amityadav.name/code-analyzers-for-php/',
#  u'http://www.babycenter.com/0_20-fun-silly-development-boosting-games-to-play-with-your-ba_1479310.bc?scid=momsbaby_20130618:4&pe=MlVEME9CaXwyMDEzMDYxOA..',
#  u'http://edition.cnn.com/2013/06/19/opinion/technology-change-lives-african-women-jamme/?hpt=hp_t5',
#  u'https://django-cookie-consent.readthedocs.org/en/latest/',
#  u'http://www.bbc.co.uk/news/world-22964022',
#  u'https://docs.google.com/presentation/d/1IRHyU7_crIiCjl0Gvue0WY3eY_eYvFQvSfwQouW9368/present#slide=id.gebc26cd7_8_0']
    user_path = "cache/delicious_users"
    if os.path.exists(user_path ):
        users = load(open(user_path,"r"))
    else:    
        print "fetching usernames"
        users = get_similar_users(u"python",count = 5)
        users.append("xiaohan2012")#add myself,hehe!
        dump(users,open(user_path,"w"))

    print "user count: %d" %len(users)
    
    prefs_path = "cache/delicious_prefs"
    if os.path.exists(prefs_path ):
        prefs = load(open(prefs_path ,"r"))
        print len(prefs)
    else:
        print "fetching user post info"
        prefs = get_prefs(users)
        dump(prefs,open(prefs_path ,"w"))
    ratio = [sum(prefs[user].values())/len(prefs[user].values()) for user in prefs]
    ratio.sort(reverse = True)
    
    from recommendations import top_matches,get_recommendations
    import random
    #user = random.choice((prefs.keys()))
    print user
    user = "******"
    print top_matches(user,prefs)
    print get_recommendations(user,prefs)
    print "It is empty, I think we might have encountered data sparsity problem, haha"
    return userDict


def fillItems(userDict):
    allItems = {}
    for user in userDict:
        for _ in range(3):
            try:
                posts = get_userposts(user)
                break
            except:
                print 'Failed user ' + user + ', retrying...'
                time.sleep(4)
        for post in posts:
            url = post['url']
            userDict[user][url] = 1.0
            allItems[url] = 1
    for ratings in userDict.values():
        for item in allItems:
            if item not in ratings:
                ratings[item] = 0.0


if __name__ == '__main__':
    delusers = initializeUserDict('programming')
    fillItems(delusers)
    user = delusers.keys()[random.randint(0, len(delusers) - 1)]
    print user
    print top_matches(delusers, user)[0:5]
    print get_recommendations(delusers, user)[0:5]
예제 #6
0
# 欧几里得距离评价
euclid1 = sim.sim_euclid(critics, 'Lisa Rose', 'Gene Seymour')
euclid2 = nsim.sim_euclid(critics, 'Lisa Rose', 'Gene Seymour')
print("欧几里得距离评价:")
print(euclid1)
print(euclid2)

# 皮尔逊相关度评价
pearson1 = sim.sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')
pearson2 = nsim.sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')
print("皮尔逊相关度评价:")
print(pearson1)
print(pearson2)

# 余弦相似度评价
cosine1 = nsim.sim_cosine(critics, 'Lisa Rose', 'Gene Seymour')
print("余弦相似度评价:")
print(cosine1)

# jaccard相似度评价: 这个例子说明了jaccard在此场景下的不精确性,虽然二者所评价的电影集合都相同,但是评分不同的差异被忽略了。
jaccard1 = sim.sim_jaccard(critics, 'Lisa Rose', 'Gene Seymour')
print("jaccard相似度评价:")
print(jaccard1)

# 基于商品的评价
movie_pearson = top_matches(transform_profs(critics),
                            'Superman Returns',
                            similarity=sim.sim_pearson,
                            n=5)
print(movie_pearson)
예제 #7
0
파일: ml_1.py 프로젝트: kushagra12/ml-stuff
import recommendations

critics = recommendations.critics

# print(critics['Lisa Rose'])
#
# print("Similarity using Euclidean Distance: \n")
# # Using Euclidean Distance
# for critic in critics:
#     print("Similarity with " + critic + " : " + str(recommendations.sim_distance("Lisa Rose", critic, critics)))
#
# print("Similarity using Pearson Correlation: \n")
# # Using Correlation
# for critic in critics:
#     print("Similarity with " + critic + " : " + str(recommendations.sim_pearson("Lisa Rose", critic, critics)))

print("Top 5 Matches for Lisa")
print(recommendations.top_matches("Lisa Rose", critics))

print("Recommendations for Toby")
print(recommendations.get_recommendations("Toby", critics))

movies = recommendations.flip_params(critics)

print("If you like superman returns then")
print(recommendations.top_matches("Superman Returns", movies))
	def test_top_matches_length(self):
		limit = random.randint(0, len(data.critics) - 1)
		matches = recommendations.top_matches(data.critics, pick_a_name(), n=limit)
		(matches).should.have.length_of(limit)
예제 #9
0
import recommendations
import deliciousrec

delicious_users = deliciousrec.initUserDict('Python', 10)
delicious_users = deliciousrec.fillItems(delicious_users)

test_user = delicious_users.keys()[0]

print recommendations.top_matches(delicious_users, test_user)
print recommendations.get_recommendatios(delicious_users, test_user)[0:4]
예제 #10
0
 def test_top_matches_length(self):
     limit = random.randint(0, len(data.critics) - 1)
     matches = recommendations.top_matches(data.critics,
                                           pick_a_name(),
                                           n=limit)
     (matches).should.have.length_of(limit)
        users = load(open(user_path, "r"))
    else:
        print "fetching usernames"
        users = get_similar_users(u"python", count=5)
        users.append("xiaohan2012")  #add myself,hehe!
        dump(users, open(user_path, "w"))

    print "user count: %d" % len(users)

    prefs_path = "cache/delicious_prefs"
    if os.path.exists(prefs_path):
        prefs = load(open(prefs_path, "r"))
        print len(prefs)
    else:
        print "fetching user post info"
        prefs = get_prefs(users)
        dump(prefs, open(prefs_path, "w"))
    ratio = [
        sum(prefs[user].values()) / len(prefs[user].values()) for user in prefs
    ]
    ratio.sort(reverse=True)

    from recommendations import top_matches, get_recommendations
    import random
    #user = random.choice((prefs.keys()))
    print user
    user = "******"
    print top_matches(user, prefs)
    print get_recommendations(user, prefs)
    print "It is empty, I think we might have encountered data sparsity problem, haha"
예제 #12
0
# Library imports
import recommendations, deliciousrec

# Initialize delicious users
delusers=deliciousrec.initialize_user_dict('programming')
# Add myself to the dataset
delusers['rogerfernandezg']={}
# Fills delicious users with data from delicious
deliciousrec.fill_items(delusers)
# Show recommendations for specific user
user=delusers.keys()[1]
print recommendations.top_matches(delusers,user)[0:10]
url=recommendations.get_recommendations(delusers,user)[0][1]
print recommendations.top_matches(recommendations.transform_prefs(delusers),url)