def coverage(train,test,W, N):
    recommend_items=set()
    all_items=set()
    for user in train.keys():
        for item in train[user]:
            all_items.add(item[1])
        rank=user_cf(user,train,W,K)   #getrecommendation(user,N)
        rank = dict(sorted(rank.items(),key=itemgetter(1),reverse=True) [0:N])
        for item in rank.keys():
            recommend_items.add(item)
    return len(recommend_items)/(len(all_items)*1.0)
def precision(train,test, W, N):
	hit=0
	all=0
	for user in train.keys():
		if user not in test:
			continue
		tu=[x[1] for x in test[user]]
		rank=user_cf(user,train,W,K)     #getrecommendation(user,N)
		rank = dict(sorted(rank.items(),key=itemgetter(1),reverse=True) [0:N])
		for item,pui in rank.items():
			if item in tu:
				hit+=1
		all+=N
	return hit/(all*1.0)
def popularity(train,test, W, N):
    item_popularity=dict()
    for user,items in train.items():
        for item in items:
            if item[1] not in item_popularity:
                item_popularity[item[1]]=0
            item_popularity[item[1]]+=1
    ret=0
    n=0
    for user in train.keys():
        rank=user_cf(user,train,W,K) #getrecommendation(user,N)
        for item in rank.keys():
            ret+=math.log(1+item_popularity[item])
            n+=1
    ret/=n*1.0
    return ret
Example #4
0
ratings = load_data("ml-1m/ratings.dat")
movies = load_data("ml-1m/movies.dat")


k = 5
seed = time.time()
train, test = splitdata(ratings, M, k, seed)


def list_to_dict(rating):
    return {"uid": rating[0], "movieid": rating[1], "rating": rating[2], "time": rating[3]}


def convert_train(train):
    for k, ratings in train.iteritems():
        train[k] = [list_to_dict(rating) for rating in ratings]
    return train


print "improve_cos ..."
W = improve_cos(train)

print "user_cf ..."
for user in users:
    rank = user_cf(user[0], train, W, K)
    # print rank
print precision(train, test, W, N)
print recall(train, test, W, N)
print coverage(train, test, W, N)
print popularity(train, test, W, N)