def plot_distance(data): comb = combinations(data.keys(), 2) all_dist = { str(p1) + ':' + str(p2): sim_distance(data, p1, p2) for p1, p2 in comb } S = Series(all_dist) S = S.sort_values(ascending=False) x_num = np.arange(len(S)) ax = S.plot() ax.set_xticks(x_num) ax.set_xticklabels(S.index, rotation=90, ha='center', va='top') myfont = fm.FontProperties(fname=r'C:\Windows\Fonts\msyh.ttc') ax.set_title('共同爱好程度', fontproperties=myfont) ax.set_xlabel('相关度', fontproperties=myfont) ax.set_xlabel('人物关系', fontproperties=myfont) ax.legend(('皮尔逊相关度', ), prop=myfont) plt.subplots_adjust(bottom=0.5) return S
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Aug 23 07:49:55 2017 @author: andres """ from recommendations import critics from recommendations import sim_distance from recommendations import sim_pearson for p in critics: print(p) #Testing simdistance sim_distance(critics, 'Lisa Rose', 'Gene Seymour') sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')
def test_DistEuclidiana(self): self.assertEqual( recommendations.sim_distance(recommendations.critics, 'Lisa Rose', 'Gene Seymour'), 0.14814814814814814)
def test_sim_distance_no_common_films(self): distance = rec.sim_distance(self.prefs, 'fulano', 'sicrano') self.assertEqual(distance, 0)
#import pydelicious #pydelicious.get_popular(tag='programming') #from deliciousrec import * #delusers=initializeUserDict('programming') #delusers['tsegaran']={} #fillIteams(delusers) import recommendations #print recommendations.critics['Lisa Rose']['Lady in the Water'] print recommendations.critics['Lisa Rose'] print recommendations.critics['Gene Seymour'] print recommendations.sim_distance(recommendations.critics,'Lisa Rose','Gene Seymour') print recommendations.sim_pearson(recommendations.critics,'Lisa Rose','Gene Seymour') print recommendations.topMatches(recommendations.critics,'Toby',n=3) print recommendations.topMatches(recommendations.critics,'Toby',n=3,similarity=recommendations.sim_distance) recommendations.getRecommendations(recommendations.critics,'Toby') recommendations.getRecommendations(recommendations.critics,'Toby',similarity=recommendations.sim_distance) movies=recommendations.transformPrefs(recommendations.critics) #print movies recommendations.topMatches(movies,'Superman Returns')
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/2/26 16:19 # @Author : Shuqi.qin # @Site : # @File : test.py # @Software: PyCharm import recommendations as rc res1 = rc.sim_distance(rc.critics, "Lisa Rose", "Gene Seymour") res2 = rc.sim_pearson(rc.critics, "Lisa Rose", "Gene Seymour") res3 = rc.topMatches(rc.critics, "Toby", n=3) res4 = rc.getRecommendationsBaseOnPersonSimilarity(rc.critics, "Toby") print 'over' import pydelicious print pydelicious.get_popular(tag='programming')
def testSimDistanceForDifferentRatings(self): actual = recommendations.sim_distance(self.ratings, 'joe', 'jeff') self.assertEqual(0.5, actual)
return tag_map if __name__ == '__main__': # stupid little demo import recommendations print 'getting users...' users = initializeUserDict('programming') users['thakis'] = {} users['ytamshg'] = {} users['tubanator'] = {} print 'getting their posts (takes a while) ...' fillItems(users) print 'Distances:' print recommendations.sim_distance(users, 'thakis', 'ytamshg') print recommendations.sim_distance(users, 'thakis', 'tubanator') print recommendations.sim_distance(users, 'tubanator', 'ytamshg') print 'Pearsons:' print recommendations.sim_pearson(users, 'thakis', 'ytamshg') print recommendations.sim_pearson(users, 'thakis', 'tubanator') print recommendations.sim_pearson(users, 'tubanator', 'ytamshg') print 'Recommendations:' print recommendations.getRecommendations(users, 'thakis')[0:10] print recommendations.getRecommendations(users, 'ytamshg')[0:10] print recommendations.getRecommendations(users, 'tubanator')[0:10]
#!/usr/bin/env python # coding=utf-8 # filename : recommendations_test.py # author : Chase # update : 2016/09/10 import recommendations print("distance between Leo and Cathy is:", recommendations.sim_distance(recommendations.critics, 'Leo', 'Cathy')) print("distance between Susie and Cathy is:", recommendations.sim_distance(recommendations.critics, 'Susie', 'Cathy')) print("Antonio TopMatches:", recommendations.topMatches(recommendations.critics, 'Antonio', n=3)) print("Leo's recommendations:", recommendations.getRecommendations(recommendations.critics, 'Leo'))
import recommendations #@todo сделать вывод в виде таблицы for item in recommendations.critics : print(item) for value in recommendations.critics : if item == value : continue print(value) result = recommendations.sim_distance(recommendations.critics, item, value) print(result)
def get_similarity_data( prefs, other_id ): # This function is part of the multi-threading process for the above. return [other_id, r.sim_distance(prefs, str(sub_id), str(other_id))]
import recommendations #@todo сделать вывод в виде таблицы for item in recommendations.critics: print(item) for value in recommendations.critics: if item == value: continue print(value) result = recommendations.sim_distance(recommendations.critics, item, value) print(result)
''' Created on 2015-6-12 @author: XXYF18 ''' from recommendations import critics,sim_distance,sim_pearson,topMatches,getRecommendations #critics['Lisa Rose']['Lady in the Water'] # #critics['Toby']['Snakes on a Plane']=4.5 #critics['Toby'] import pydelicious pydelicious.get_popular(tag='programming') print sim_distance(critics, 'Lisa Rose','Gene Seymour') print sim_pearson(critics,'Lisa Rose','Gene Seymour') print topMatches(critics,'Toby',n=3) print getRecommendations(critics,'Toby') print getRecommendations(critics,'Toby', similarity=sim_distance)
if __name__ == '__main__': # stupid little demo import recommendations print 'getting users...' users = initializeUserDict('programming') users['thakis'] = {} users['ytamshg'] = {} users['tubanator'] = {} print 'getting their posts (takes a while) ...' fillItems(users) print 'Distances:' print recommendations.sim_distance(users, 'thakis', 'ytamshg') print recommendations.sim_distance(users, 'thakis', 'tubanator') print recommendations.sim_distance(users, 'tubanator', 'ytamshg') print 'Pearsons:' print recommendations.sim_pearson(users, 'thakis', 'ytamshg') print recommendations.sim_pearson(users, 'thakis', 'tubanator') print recommendations.sim_pearson(users, 'tubanator', 'ytamshg') print 'Recommendations:' print recommendations.getRecommendations(users, 'thakis')[0:10] print recommendations.getRecommendations(users, 'ytamshg')[0:10] print recommendations.getRecommendations(users, 'tubanator')[0:10]
# encoding: utf-8 ''' Created on 2015年7月19日 @author: HCY ''' from recommendations import critics, sim_distance, sim_pearson import recommendations #print(critics['Lisa Rose']['Lady in the Water']) print(critics['Hcy']) print(recommendations.sim_distance(critics, 'Lisa Rose', 'Gene Seymour')) print(recommendations.sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')) print(recommendations.topMatches(critics, 'Hcy',n=3,similarity=sim_pearson)) print(recommendations.topMatches(critics, 'Hcy',n=3,similarity=sim_distance)) print(recommendations.getRecommendations(critics, 'Hcy'))
import recommendations import operator rating_distance = {} for person1 in recommendations.critics: ratings = {} for person2 in recommendations.critics: if person1 != person2: print(person1 + ', ' + person2) ratings.update({ person2: recommendations.sim_distance(recommendations.critics, person1, person2) }) rating_distance.update({person1: ratings}) #rating_distance.sort() print(rating_distance) #sort the distance by closest to 1 first. #show the closest related person to each person. closest_person = [] for person in rating_distance.keys(): sorted_distance = sorted(rating_distance.get(person).items(), key=operator.itemgetter(1), reverse=True) closest_person.append({person: sorted_distance[0]}) print(closest_person)
from recommendations import critics from recommendations import sim_distance from recommendations import sim_pearson from recommendations import pearsonTopMatches from recommendations import distanceTopMatches from recommendations import getRecommendations print(critics['Lisa Rose']['Lady in the Water']) critics['Toby']['Snakes on a Plane'] = 5.0 print(critics['Toby']) result = { 'euclidean distance socre': sim_distance(critics, 'Lisa Rose', 'Gene Seymour') } print(result) result = { 'Pearson correlation score': sim_pearson(critics, 'Lisa Rose', 'Gene Seymour') } print(result) print(distanceTopMatches(critics, 'Toby', n=3)) print(pearsonTopMatches(critics, 'Toby', n=3)) print('-------------------------------------') print(getRecommendations(critics, 'Toby')) print('-------------------------------------') print('-------------------------------------') print(getRecommendations(critics, 'Toby', similarity=sim_distance))
# Split prefs into train and test prefs movies = prefs.keys() random.shuffle(movies) movies_train, movies_test = movies[:int(0.9 * len(movies))], movies[int(0.1 * len(movies)):] train = {m: prefs[m] for m in movies_train} test = {m: prefs[m] for m in movies_test} for movie in test: sim_distances = [] sim_pearsons = [] for other_movie in train: # Calculate distance using euclidean distance sim_distances.append((r.sim_distance(prefs, movie, other_movie), other_movie)) # Calculate similarity using pearson sim_pearsons.append((r.sim_pearson(prefs, movie, other_movie), other_movie)) # distance sort ascending sim_distances.sort() # similarity sort descending sim_pearsons.sort(reverse=True) # select 1st top of the list sim_most_related_movie = sim_distances[0][1] pear_most_related_movie = sim_pearsons[0][1] # Compare print 'Using euclidean distance : Actual movie = {}, Predicted movie = {}'.format(movie, sim_most_related_movie) print 'Using pearson similarity : Actual movie = {}, Predicted movie = {}'.format(movie, pear_most_related_movie)
def test_DistEuclidiana(self): self.assertEqual(recommendations.sim_distance(recommendations.critics, 'Lisa Rose','Gene Seymour'), 0.14814814814814814)
def testSimDistanceForSameRatings(self): self.ratings['jeff']['Ford'] = 1 actual = recommendations.sim_distance(self.ratings, 'joe', 'jeff') self.assertEqual(1, actual)
import recommendations print "distance" print recommendations.sim_distance(recommendations.critics, 'ming', 'lin') print recommendations.sim_distance(recommendations.critics, 'ming', 'michael') print recommendations.sim_distance(recommendations.critics, 'ming', 'mick') print "pearson" print recommendations.sim_pearson(recommendations.critics, 'ming', 'lin') print recommendations.sim_pearson(recommendations.critics, 'ming', 'michael') print recommendations.sim_pearson(recommendations.critics, 'ming', 'mick') print "top match" print recommendations.topMatches(recommendations.critics, 'ming', n = 3) print "recommendation" print recommendations.getRecommendations(recommendations.critics, 'ming') print "recommend items" items = recommendations.transformPrefs(recommendations.critics) print recommendations.topMatches(items, 'lady in the water', n =3) print recommendations.sim_pearson(items, 'lady in the water', 'snake on a plane') print "item similarity use distance" print recommendations.calculateSimilarItems(recommendations.critics) print 'item similarity use pearson' print recommendations.calculateSimilarItems(recommendations.critics, similarity = recommendations.sim_pearson) print 'recommendation base on items' itemsim = recommendations.calculateSimilarItems(recommendations.critics)
# don't compare me to myself if other == person: continue sim = similarity(prefs, person, other) # ignore scores of zero or lower if sim <= 0: continue for item in prefs[other]: # only score movies I haven't seen yet if item not in prefs[person] or prefs[person][item] == 0: # Similarity * Score totals.setdefault(item, 0) totals[item] += prefs[other][item] * sim # Sum of similarities simSums.setdefault(item, 0) simSums[item] += sim # Create the normalized list rankings = [(total / simSums[item], item) for item, total in totals.items()] # Return the sorted list rankings.sort() rankings.reverse() return rankings if __name__ == '__main__': d = sim_distance(critics, 'Lisa Rose', 'Claudia Puig') print d d = getRecommendations(critics,'Toby') print d
import recommendations reload(recommendations) # Finds distances for all critics: distances = [] for p1 in critics: for p2 in critics: if p1 != p2: distances.append((recommendations.sim_distance(recommendations.critics, p1, p2), p1, p2)) distances.sort() distances.reverse() distances ########################### reload(recommendations) recommendations.sim_distance(recommendations.critics, 'Lisa Rose','Gene Seymour') recommendations.sim_pearson(recommendations.critics, 'Lisa Rose','Gene Seymour') recommendations.topMatches(recommendations.critics,'Toby', n = 3) ###################################################### # del.icio.us: ###################################################### delusers = deliciousrec.initializeUserDict('gratis') delusers['lpagliari'] = {} # Add yourself to the dictionary if you use delicious deliciousrec.fillItems(delusers) delusers ########################### # pick random user and find users similar to him/her:
#!/usr/bin/env python from math import sqrt # Euclidean distance euclidean_distance = sqrt(pow(5-4,2)+pow(4-1,2)) print euclidean_distance # avoid zero divide inversed_ed = 1/(1+sqrt(pow(5-4,2)+pow(4-1,2))) print inversed_ed import recommendations sim_distance_result = recommendations.sim_distance( recommendations.critics, 'Lisa Rose', 'Gene Seymour' ) print sim_distance_result
def test_sim_distance(self): distance = rec.sim_distance(self.prefs, 'fulano', 'beltrano') self.assertAlmostEqual(distance, 0.47516409872148213)