def recallWithParams(filePrefix, rootPath, tru, tri, teu, topN, featureK, model): usersVectorFile = filePrefix + 'usersVector.npy' usersBiasFile = filePrefix + 'usersBias.npy' itemsVectorFile = filePrefix + 'itemsVector.npy' itemsBiasFile = filePrefix + 'itemsBias.npy' thetaPath = rootPath + 'final.gamma' + str(featureK) usersVector = utils.loadNumpyMatrix(usersVectorFile) itemsVector = utils.loadNumpyMatrix(itemsVectorFile) usersBias = None #utils.loadNumpyMatrix(usersBiasFile) itemsBias = None #utils.loadNumpyMatrix(itemsBiasFile) trainUser, trainData = cu.loadData(tru, 'user') trainItem, noUse = cu.loadData(tri, 'item') if hasLDA: theta = cu.loadTheta(thetaPath) else: theta = [] userNum = len(trainUser.keys()) itemNum = len(trainItem.keys()) noUse = None #print 'load test data...' testUser, testData = cu.loadData(teu, 'user') #testItem, testDataNoUse = cu.loadData(tei, 'item') noUse = None recallAll(trainUser, trainItem, testUser, itemNum, topN, usersVector, itemsVector, usersBias, itemsBias, featureK, model, theta)
def solve(argv, alpha, Lambdau, Lambdav, Lambdauv, epochs): print argv MF = True CTR = False RAPMF = True RAPARE = False hasLDA = False save = True dataset = argv[0] if dataset == 'delicious' or dataset == 'fm' or dataset == 'blog': scale = 1 else: scale = 5 kernel = argv[1] initFunction = argv[2] featureK = argv[3] tru = argv[4] tri = argv[5] teu = argv[6] tei = argv[7] topN = [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50] thetaPath = argv[8] filePrefix = argv[9] print 'load train data...' trainUser, trainData = cu.loadData(tru, 'user') trainItem, noUse = cu.loadData(tri, 'item') userNum = len(trainUser.keys()) itemNum = len(trainItem.keys()) noUse = None print 'load test data...' testUser, testData = cu.loadData(teu, 'user') noUse = None print 'User No.', userNum, 'Item No.', itemNum, 'ratings in Train:', len( trainData), 'rating in Test:', len(testData) mean = 0.0 theta = [] if RAPARE or RAPMF: if RAPARE: print 'load theta...' theta = cu.loadTheta(thetaPath) else: theta = [] usersVector = None itemsVector = None usersBias = None itemsBias = None usersRatingCountedSet = utils.countRatings(trainUser) itemsRatingCountedSet = utils.countRatings(trainItem) ra.train(mean, userNum, itemNum, featureK, trainData, testData, epochs, alpha, Lambdau, dataset, usersRatingCountedSet, itemsRatingCountedSet, True, usersVector, itemsVector, usersBias, itemsBias, Lambdauv, theta, filePrefix + str(featureK), save, scale, Lambdav, hasLDA, tru, tri, teu, topN)
def recallWithParamsOnLearning(tru, tri, teu, topN, featureK, model, usersVector, itemsVector, theta): usersBias = None #utils.loadNumpyMatrix(usersBiasFile) itemsBias = None #utils.loadNumpyMatrix(itemsBiasFile) trainUser, trainData = cu.loadData(tru, 'user') trainItem, noUse = cu.loadData(tri, 'item') userNum = len(trainUser.keys()) itemNum = len(trainItem.keys()) noUse = None #print 'load test data...' testUser, testData = cu.loadData(teu, 'user') #testItem, testDataNoUse = cu.loadData(tei, 'item') noUse = None recallAll(trainUser, trainItem, testUser, itemNum, topN, usersVector, itemsVector, usersBias, itemsBias, featureK, model, theta)
userVector = userVectors[userID - 1] itemVector = itemVectors[itemID - 1] predictedRating = mf.predictRating(userVector, itemVector, 0, 0, 0, 5) error = mf.computeError(rating, predictedRating) mae += abs(error) print mae / len(testData) if __name__ == '__main__': MF = True CTR = False model = False featureK = 50 data = 'google75' rootPath = 'ctrdata/' + data + '/' teu = rootPath + data + '-Test-User.dat' testData, testData1 = cu.loadData(teu, 'user') filePrefix = 'ctrdata/google75/500.05_0.5_0.01/RaP_' filePrefix = 'ctrdata/google75/2000.1_0.1/MF_' #filePrefix = 'ctrdata/google75/2000.5_0.5_0.01/noLDA_RaP_' usersVectorFile = filePrefix + 'usersVector.npy' itemsVectorFile = filePrefix + 'itemsVector.npy' userVectors = utils.loadNumpyMatrix(usersVectorFile) itemVectors = utils.loadNumpyMatrix(itemsVectorFile) usersBias = None #utils.loadNumpyMatrix(usersBiasFile) itemsBias = None #utils.loadNumpyMatrix(itemsBiasFile) computeMAE(testData1, userVectors, itemVectors)
def solve(argv, alpha, Lambdau, Lambdav, Lambdauv, epochs): print argv MF = False CTR = False RAPMF = False RAPARE = True hasLDA = False save = True dataset = argv[0] if dataset == 'delicious' or dataset == 'fm': scale = 1 else: scale = 5 newElement = argv[1] kernel = argv[2] initFunction = argv[3] featureK = argv[4] tru = argv[5] tri = argv[6] teu = argv[7] tei = argv[8] topN = [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50] thetaPath = argv[9] filePrefix = argv[10] print 'feature:', featureK print 'load train data...' trainUser, trainData = cu.loadData(tru, 'user') trainItem, noUse = cu.loadData(tri, 'item') userNum = len(trainUser.keys()) itemNum = len(trainItem.keys()) noUse = None print 'load test data...' testUser, testData = cu.loadData(teu, 'user') #testItem, testDataNoUse = cu.loadData(tei, 'item') noUse = None print 'User No.', userNum, 'Item No.', itemNum, 'ratings in Train:', len( trainData), 'rating in Test:', len(testData) #mean = 4.31725404304#video #mean = 4.20794266709#google play mean = 4.18519626023 #auto mean = 0.0 theta = [] if MF: print 'start matrix factorization' mf.train(userNum, itemNum, featureK, trainData, testData, epochs, Lambdau, Lambdav, alpha, mean, filePrefix + str(featureK), save, theta, scale) if CTR: print 'load theta from...', thetaPath theta = cu.loadTheta(thetaPath) print 'start CTR' mf.train(userNum, itemNum, featureK, trainData, testData, epochs, Lambdau, Lambdav, alpha, mean, filePrefix + str(featureK), save, theta, scale) if RAPARE or RAPMF: if RAPARE: print 'load theta...' theta = cu.loadTheta(thetaPath) else: theta = [] usersVector = None itemsVector = None usersBias = None itemsBias = None usersRatingCountedSet = utils.countRatings(trainUser) itemsRatingCountedSet = utils.countRatings(trainItem) ra.train(mean, userNum, itemNum, featureK, trainData, testData, epochs, alpha, Lambdau, dataset, usersRatingCountedSet, itemsRatingCountedSet, True, usersVector, itemsVector, usersBias, itemsBias, Lambdauv, theta, filePrefix + str(featureK), save, scale, Lambdav, hasLDA, tru, tri, teu, topN)