def coverage(train,test,W, N): recommend_items=set() all_items=set() for user in train.keys(): for item in train[user]: all_items.add(item[1]) rank=user_cf(user,train,W,K) #getrecommendation(user,N) rank = dict(sorted(rank.items(),key=itemgetter(1),reverse=True) [0:N]) for item in rank.keys(): recommend_items.add(item) return len(recommend_items)/(len(all_items)*1.0)
def precision(train,test, W, N): hit=0 all=0 for user in train.keys(): if user not in test: continue tu=[x[1] for x in test[user]] rank=user_cf(user,train,W,K) #getrecommendation(user,N) rank = dict(sorted(rank.items(),key=itemgetter(1),reverse=True) [0:N]) for item,pui in rank.items(): if item in tu: hit+=1 all+=N return hit/(all*1.0)
def popularity(train,test, W, N): item_popularity=dict() for user,items in train.items(): for item in items: if item[1] not in item_popularity: item_popularity[item[1]]=0 item_popularity[item[1]]+=1 ret=0 n=0 for user in train.keys(): rank=user_cf(user,train,W,K) #getrecommendation(user,N) for item in rank.keys(): ret+=math.log(1+item_popularity[item]) n+=1 ret/=n*1.0 return ret
ratings = load_data("ml-1m/ratings.dat") movies = load_data("ml-1m/movies.dat") k = 5 seed = time.time() train, test = splitdata(ratings, M, k, seed) def list_to_dict(rating): return {"uid": rating[0], "movieid": rating[1], "rating": rating[2], "time": rating[3]} def convert_train(train): for k, ratings in train.iteritems(): train[k] = [list_to_dict(rating) for rating in ratings] return train print "improve_cos ..." W = improve_cos(train) print "user_cf ..." for user in users: rank = user_cf(user[0], train, W, K) # print rank print precision(train, test, W, N) print recall(train, test, W, N) print coverage(train, test, W, N) print popularity(train, test, W, N)