예제 #1
0
def userExperiment1():

    distProdId = getdistinctProduct(isTrain)  # has the distinct product id's
    n = len(distProdId)

    distUserId = getdistinctUser(isTrain)  # has the distinct product id's
    userLen = len(distUserId)

    print "Distinct Products Count :", n
    print "Distinct users count :", userLen

    # function calculates the pearson coeff
    pairwiseUserSimilarity(30)  # 30 Common Products Per user

    #function calcualtes the recommendation success
    userBasedCollabFiltering()
예제 #2
0
def userExperiment2():

    # Function below removes the data where the sentiment analysis prediction doesnt match with the actual rating
    resetIgnore()

    distProdId = getdistinctProduct(isTrain)
    n = len(distProdId)

    distUserId = getdistinctUser(isTrain)
    userLen = len(distUserId)

    print "Distinct Products Count :", n
    print "Distinct users count :", userLen

    # function calculates the pearson coeff
    pairwiseUserSimilarity(30)  # 30 Common Products Per User

    #function calcualtes the recommendation success
    userBasedCollabFiltering()
def experiment1():

	distProdId = getdistinctProduct(isTrain) # has the distinct product id's
	n = len(distProdId)

	distUserId = getdistinctUser(isTrain) # has the distinct product id's
	userLen = len(distUserId)

	print "Distinct Products Count :", n
	print "Distinct users count :", userLen
	

	# funtion deletes existing similarity already if inserted
	deleteSimilarityFromDB()
	
	# function calculates the pearson coeff
	pairwiseProdSimilarity(30) # 30 Common users Per Product

	#function calculates the recommendation success
	productBasedCollabFiltering()
	comparisonPercentageRecVSTest()
def experiment2():

	# Function below removes the data where the sentiment analysis prediction doesnt match with the actual rating 
	resetIgnore()

	distProdId = getdistinctProduct(isTrain)
	n = len(distProdId)

	distUserId = getdistinctUser(isTrain)
	userLen = len(distUserId)

	print "Distinct Products Count :", n
	print "Distinct users count :", userLen

	
	# funtion deletes existing similarity already if inserted
	deleteSimilarityFromDB()
	
	# function calculates the pearson coeff
	pairwiseProdSimilarity(30) # 30 Common users Per Product

	#function calcualtes the recommendation success
	productBasedCollabFiltering()
	comparisonPercentageRecVSTest()
import math

from sets import Set
import math
import operator



isTrain = True #flag which specifies to fetch the data from the train table
expId = 1

distProdId = getdistinctProduct(isTrain) # has the distinct product id's
n = len(distProdId)


distUserId = getdistinctUser(isTrain) # has the distinct user id's
userLen = len(distUserId)


#function to calculate item-based CF for different length of common users
def pairwiseProdSimilarity(commonUserInterLen):
	userResult1 = {}  
	userResult2 = {}
	productToUsers = {}
	productToUserResult = {}

	#gets the users and rating for each distinct product from the train 
	for i in range(0,n):
		userResult = getUsersOfProduct(distProdId[i], isTrain)
		# stores the product index and the corresponding user and rating in the dictionary
		productToUserResult[i] = userResult 
예제 #6
0
def pairwiseUserSimilarity(commonProdInterLen):
    deleteUserSimilarityFromDB()
    distProdId = getdistinctProduct(isTrain)  # has the distinct product id's
    n = len(distProdId)

    distUserId = getdistinctUser(isTrain)  # has the distinct product id's
    userLen = len(distUserId)

    print userLen

    print "distinct Users Cnt ", userLen

    productResult1 = {}
    productResult2 = {}
    userToProducts = {}
    userToProductResult = {}

    for u in range(0, userLen):
        productResult = getProductOfUsers(distUserId[u], isTrain)
        userToProductResult[u] = productResult
        productSet = set(productResult.keys())
        userToProducts[u] = productSet

    #print productResult

    #print userToProductResult

    #print userToProducts

    for u in range(0, userLen):

        if u % 1000 == 0:
            print "done  ", u

        productResult1 = userToProducts[u]

        for v in range(u + 1, userLen):

            productResult2 = userToProducts[v]
            commonProducts = productResult1.intersection(productResult2)
            commonProductsLen = len(commonProducts)
            # if commonProductsLen > 0:
            # 	print  commonProductsLen

            if commonProductsLen > commonProdInterLen:
                #print
                r_pu_sum = 0
                r_pv_sum = 0

                for p in commonProducts:
                    try:
                        r_pu_sum = r_pu_sum + userToProductResult[u][p]
                        r_pv_sum = r_pv_sum + userToProductResult[v][p]
                    except Exception, e:
                        continue

                numerator = 0
                factor1 = 0
                factor2 = 0
                denominator1 = 0
                denominator2 = 0

                ru_mean = float(r_pu_sum) / commonProductsLen
                rv_mean = float(r_pv_sum) / commonProductsLen

                for p in commonProducts:
                    try:
                        r_pu = userToProductResult[u][p]
                        r_pv = userToProductResult[v][p]

                        factor1 = r_pu - ru_mean
                        factor2 = r_pv - rv_mean

                        numerator = numerator + (factor1 * factor2)

                        denominator1 = denominator1 + (factor1 * factor1)
                        denominator2 = denominator2 + (factor2 * factor2)

                    except Exception, e:
                        continue

                if denominator1 == 0 or denominator2 == 0:
                    sim_u_v = 1
                else:
                    sim_u_v = numerator / (math.sqrt(denominator1) *
                                           math.sqrt(denominator2))

                # if distUserId[u] == 'A10FL3MC9QFZ7P' and distUserId[v] == 'A1H5CRR7Z4SV9F':
                # 	print r_pu_sum,r_pv_sum,commonProducts
                # 	print distUserId[u],distUserId[v],commonProductsLen,ru_mean,rv_mean,denominator1,denominator2,numerator
                # 	print "Similarity: ",u,v,sim_u_v

                insertUserSimilarity(distUserId[u], distUserId[v], sim_u_v)
예제 #7
0
def userBasedCollabFiltering():

    #print "distinct users",userLen

    distProdId = getdistinctProduct(isTrain)  # has the distinct product id's
    n = len(distProdId)

    distUserId = getdistinctUser(isTrain)  # has the distinct product id's
    userLen = len(distUserId)

    result = {}
    productsToUser = {}
    productToUserResult = {}

    totalRecommendations = 0
    totalHits = 0

    for i in range(0, n):
        userResult = getUsersOfProduct(distProdId[i], isTrain)
        productToUserResult[i] = userResult
        userSet = set(userResult.keys())
        productsToUser[i] = userSet

    similarUserMap = {}
    for j in range(0, userLen):
        similarUserResult = getSimilarUser(distUserId[j])
        similarUserMap[j] = similarUserResult

    # for all products
    for i in range(0, n):

        usersRatedInTest = getUsersOfProduct(distProdId[i], False)
        userSetTest = set(usersRatedInTest.keys())

        numRecommendations = 0
        numHits = 0
        if i % 10000 == 0:
            print "done", i

        recommendation = {}
        usersRated = productsToUser[i]
        usersRatedResult = productToUserResult[i]

        # for all users
        for u in range(0, userLen):
            # already rated
            if u in usersRated:
                continue

            similarToUser_u = similarUserMap[u]
            commonUserIds = usersRated.intersection(similarToUser_u.keys())

            if len(commonUserIds) > 0:

                numerator = float(0)
                denominator = float(0)

                for uId in commonUserIds:
                    simScore = similarToUser_u[uId]

                    if simScore == 0:
                        continue
                    try:
                        numerator += simScore * usersRatedResult[uId]
                        denominator += math.fabs(simScore)
                    except:
                        continue

                # no new product to recommend
                if denominator == 0:
                    continue

                recommendationScore = numerator / denominator
                recommendation[u] = recommendationScore

        if expId == 1:
            #print recommendation
            for j in recommendation.keys():

                if recommendation[j] > 0:
                    numRecommendations += 1
                    if distUserId[j] in userSetTest:
                        #print "Recommnding ",j
                        numHits += 1

        totalRecommendations += numRecommendations
        totalHits += numHits

    try:
        rec_success = 0
        if totalRecommendations == 0:
            rec_success = 0
        else:
            rec_success = float(totalHits) / float(totalRecommendations)

        print "Total Rec : ", totalRecommendations, " Total Hits", totalHits, " Recomendation Success : ", rec_success
    except:
        print " "
예제 #8
0
from queryData import resetIgnore
from queryData import insertUserSimilarity
from queryData import getSimilarUser
from queryData import deleteUserSimilarityFromDB

from sets import Set
import math
import operator

isTrain = True
expId = 1

distProdId = getdistinctProduct(isTrain)  # has the distinct product id's
n = len(distProdId)

distUserId = getdistinctUser(isTrain)  # has the distinct product id's
userLen = len(distUserId)

#print userLen


def pairwiseUserSimilarity(commonProdInterLen):
    deleteUserSimilarityFromDB()
    distProdId = getdistinctProduct(isTrain)  # has the distinct product id's
    n = len(distProdId)

    distUserId = getdistinctUser(isTrain)  # has the distinct product id's
    userLen = len(distUserId)

    print userLen