Ejemplo n.º 1
0
def recallWithParams(filePrefix, rootPath, tru, tri, teu, topN, featureK,
                     model):
    usersVectorFile = filePrefix + 'usersVector.npy'
    usersBiasFile = filePrefix + 'usersBias.npy'
    itemsVectorFile = filePrefix + 'itemsVector.npy'
    itemsBiasFile = filePrefix + 'itemsBias.npy'
    thetaPath = rootPath + 'final.gamma' + str(featureK)
    usersVector = utils.loadNumpyMatrix(usersVectorFile)
    itemsVector = utils.loadNumpyMatrix(itemsVectorFile)
    usersBias = None  #utils.loadNumpyMatrix(usersBiasFile)
    itemsBias = None  #utils.loadNumpyMatrix(itemsBiasFile)

    trainUser, trainData = cu.loadData(tru, 'user')
    trainItem, noUse = cu.loadData(tri, 'item')
    if hasLDA:
        theta = cu.loadTheta(thetaPath)
    else:
        theta = []
    userNum = len(trainUser.keys())
    itemNum = len(trainItem.keys())
    noUse = None

    #print 'load test data...'
    testUser, testData = cu.loadData(teu, 'user')
    #testItem, testDataNoUse = cu.loadData(tei, 'item')
    noUse = None

    recallAll(trainUser, trainItem, testUser, itemNum, topN, usersVector,
              itemsVector, usersBias, itemsBias, featureK, model, theta)
Ejemplo n.º 2
0
def solve(argv, alpha, Lambdau, Lambdav, Lambdauv, epochs):
    print argv
    MF = True
    CTR = False
    RAPMF = True
    RAPARE = False
    hasLDA = False
    save = True
    dataset = argv[0]
    if dataset == 'delicious' or dataset == 'fm' or dataset == 'blog':
        scale = 1
    else:
        scale = 5
    kernel = argv[1]
    initFunction = argv[2]
    featureK = argv[3]
    tru = argv[4]
    tri = argv[5]
    teu = argv[6]
    tei = argv[7]
    topN = [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
    thetaPath = argv[8]
    filePrefix = argv[9]

    print 'load train data...'
    trainUser, trainData = cu.loadData(tru, 'user')
    trainItem, noUse = cu.loadData(tri, 'item')
    userNum = len(trainUser.keys())
    itemNum = len(trainItem.keys())
    noUse = None

    print 'load test data...'
    testUser, testData = cu.loadData(teu, 'user')
    noUse = None

    print 'User No.', userNum, 'Item No.', itemNum, 'ratings in Train:', len(
        trainData), 'rating in Test:', len(testData)

    mean = 0.0
    theta = []
    if RAPARE or RAPMF:
        if RAPARE:
            print 'load theta...'
            theta = cu.loadTheta(thetaPath)
        else:
            theta = []
        usersVector = None
        itemsVector = None
        usersBias = None
        itemsBias = None
        usersRatingCountedSet = utils.countRatings(trainUser)
        itemsRatingCountedSet = utils.countRatings(trainItem)
        ra.train(mean, userNum, itemNum, featureK, trainData, testData, epochs,
                 alpha, Lambdau, dataset, usersRatingCountedSet,
                 itemsRatingCountedSet, True, usersVector, itemsVector,
                 usersBias, itemsBias, Lambdauv, theta,
                 filePrefix + str(featureK), save, scale, Lambdav, hasLDA, tru,
                 tri, teu, topN)
Ejemplo n.º 3
0
def recallWithParamsOnLearning(tru, tri, teu, topN, featureK, model,
                               usersVector, itemsVector, theta):
    usersBias = None  #utils.loadNumpyMatrix(usersBiasFile)
    itemsBias = None  #utils.loadNumpyMatrix(itemsBiasFile)

    trainUser, trainData = cu.loadData(tru, 'user')
    trainItem, noUse = cu.loadData(tri, 'item')
    userNum = len(trainUser.keys())
    itemNum = len(trainItem.keys())
    noUse = None

    #print 'load test data...'
    testUser, testData = cu.loadData(teu, 'user')
    #testItem, testDataNoUse = cu.loadData(tei, 'item')
    noUse = None

    recallAll(trainUser, trainItem, testUser, itemNum, topN, usersVector,
              itemsVector, usersBias, itemsBias, featureK, model, theta)
Ejemplo n.º 4
0
        userVector = userVectors[userID - 1]
        itemVector = itemVectors[itemID - 1]
        predictedRating = mf.predictRating(userVector, itemVector, 0, 0, 0, 5)
        error = mf.computeError(rating, predictedRating)
        mae += abs(error)

    print mae / len(testData)


if __name__ == '__main__':
    MF = True
    CTR = False
    model = False
    featureK = 50
    data = 'google75'
    rootPath = 'ctrdata/' + data + '/'

    teu = rootPath + data + '-Test-User.dat'
    testData, testData1 = cu.loadData(teu, 'user')

    filePrefix = 'ctrdata/google75/500.05_0.5_0.01/RaP_'
    filePrefix = 'ctrdata/google75/2000.1_0.1/MF_'
    #filePrefix = 'ctrdata/google75/2000.5_0.5_0.01/noLDA_RaP_'
    usersVectorFile = filePrefix + 'usersVector.npy'
    itemsVectorFile = filePrefix + 'itemsVector.npy'
    userVectors = utils.loadNumpyMatrix(usersVectorFile)
    itemVectors = utils.loadNumpyMatrix(itemsVectorFile)
    usersBias = None  #utils.loadNumpyMatrix(usersBiasFile)
    itemsBias = None  #utils.loadNumpyMatrix(itemsBiasFile)
    computeMAE(testData1, userVectors, itemVectors)
Ejemplo n.º 5
0
def solve(argv, alpha, Lambdau, Lambdav, Lambdauv, epochs):
    print argv
    MF = False
    CTR = False
    RAPMF = False
    RAPARE = True
    hasLDA = False
    save = True
    dataset = argv[0]
    if dataset == 'delicious' or dataset == 'fm':
        scale = 1
    else:
        scale = 5
    newElement = argv[1]
    kernel = argv[2]
    initFunction = argv[3]
    featureK = argv[4]
    tru = argv[5]
    tri = argv[6]
    teu = argv[7]
    tei = argv[8]
    topN = [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
    thetaPath = argv[9]
    filePrefix = argv[10]
    print 'feature:', featureK
    print 'load train data...'
    trainUser, trainData = cu.loadData(tru, 'user')
    trainItem, noUse = cu.loadData(tri, 'item')
    userNum = len(trainUser.keys())
    itemNum = len(trainItem.keys())
    noUse = None

    print 'load test data...'
    testUser, testData = cu.loadData(teu, 'user')
    #testItem, testDataNoUse = cu.loadData(tei, 'item')
    noUse = None

    print 'User No.', userNum, 'Item No.', itemNum, 'ratings in Train:', len(
        trainData), 'rating in Test:', len(testData)

    #mean = 4.31725404304#video
    #mean = 4.20794266709#google play
    mean = 4.18519626023  #auto
    mean = 0.0
    theta = []
    if MF:
        print 'start matrix factorization'
        mf.train(userNum, itemNum, featureK, trainData, testData, epochs,
                 Lambdau, Lambdav, alpha, mean, filePrefix + str(featureK),
                 save, theta, scale)
    if CTR:
        print 'load theta from...', thetaPath
        theta = cu.loadTheta(thetaPath)
        print 'start CTR'
        mf.train(userNum, itemNum, featureK, trainData, testData, epochs,
                 Lambdau, Lambdav, alpha, mean, filePrefix + str(featureK),
                 save, theta, scale)

    if RAPARE or RAPMF:
        if RAPARE:
            print 'load theta...'
            theta = cu.loadTheta(thetaPath)
        else:
            theta = []
        usersVector = None
        itemsVector = None
        usersBias = None
        itemsBias = None
        usersRatingCountedSet = utils.countRatings(trainUser)
        itemsRatingCountedSet = utils.countRatings(trainItem)
        ra.train(mean, userNum, itemNum, featureK, trainData, testData, epochs,
                 alpha, Lambdau, dataset, usersRatingCountedSet,
                 itemsRatingCountedSet, True, usersVector, itemsVector,
                 usersBias, itemsBias, Lambdauv, theta,
                 filePrefix + str(featureK), save, scale, Lambdav, hasLDA, tru,
                 tri, teu, topN)