Example #1
0
File: cal_rec.py Project: pjpan/R
def cal_rec(p,cut):
    R_true = read_user('cf-test-1-users.dat')
    dir_save = 'cdl'+str(p)
    U = np.mat(np.loadtxt(dir_save+'/final-U.dat'))
    V = np.mat(np.loadtxt(dir_save+'/final-V.dat'))
    R = U*V.T
    num_u = R.shape[0]
    num_hit = 0
    fp = open(dir_save+'/rec-list.dat','w')
    for i in range(num_u):
        if i!=0 and i%100==0:
            print 'Iter '+str(i)
        l_score = R[i,:].A1.tolist()
        pl = sorted(enumerate(l_score),key=lambda d:d[1],reverse=True)
        l_rec = list(zip(*pl)[0])[:cut]
        s_rec = set(l_rec)
        s_true = set(np.where(R_true[i,:]>0)[1].A1)
        cnt_hit = len(s_rec.intersection(s_true))
        fp.write('%d:' % cnt_hit)
        fp.write(' '.join(map(str,l_rec)))
        fp.write('\n')
    fp.close()
Example #2
0

        
Example #3
0
        cnt_hit = len(s_rec.intersection(s_true))
        fp.write('%d:' % cnt_hit)
        fp.write(' '.join(map(str, l_rec)))
        fp.write('\n')
    fp.close()


if __name__ == '__main__':

    # give the same p as given in cdl.py
    p = 1
    M_low = 50
    M_high = 300
    cal_rec(p, M_high)
    dir_save = 'cdl%d' % p
    R_test = read_user('cf-test-1-users.dat')
    fp = open('rec-list.dat')
    lines = fp.readlines()

    total = 0
    correct = 0
    users = 0
    total_items_liked = 0
    num_users = len(range(R_test.shape[0]))

    # recall@M is calculated for M = 50 to 300

    recall_levels = M_high-M_low + 1
    recallArray = np.zeros(shape=(num_users,recall_levels))

    for user_id in range(num_users):
Example #4
0
        cnt_hit = len(s_rec.intersection(s_true))
        fp.write('%d:' % cnt_hit)
        fp.write(' '.join(map(str, l_rec)))
        fp.write('\n')
    fp.close()


if __name__ == '__main__':

    # give the same p as given in cdl.py
    p = 1
    M_low = 50
    M_high = 300
    cal_rec(p, M_high)
    dir_save = 'cdl%d' % p
    R_test = read_user('data/cf-test-1-users.dat')
    fp = open(dir_save + '/rec-list.dat')
    lines = fp.readlines()

    total = 0
    correct = 0
    users = 0
    total_items_liked = 0
    num_users = len(range(R_test.shape[0]))

    # recall@M is calculated for M = 50 to 300

    recall_levels = M_high - M_low + 1
    recallArray = np.zeros(shape=(num_users, recall_levels))

    for user_id in range(num_users):
Example #5
0
    lv = 1e-2  # lambda_v/lambda_n in CDL
    dir_save = 'cdl%d' % p
    if not os.path.isdir(dir_save):
        os.system('mkdir %s' % dir_save)
    fp = open(dir_save + '/cdl.log', 'w')
    print 'p%d: lambda_v/lambda_u/ratio/K: %f/%f/%f/%d' % (p, lambda_v,
                                                           lambda_u, lv, K)
    fp.write('p%d: lambda_v/lambda_u/ratio/K: %f/%f/%f/%d\n' % \
            (p,lambda_v,lambda_u,lv,K))
    fp.close()
    if is_dummy:
        X = data.get_dummy_mult()
        R = data.read_dummy_user()
    else:
        X = data.get_mult()
        R = data.read_user()
    # set to INFO to see less information during training
    logging.basicConfig(level=logging.DEBUG)
    #ae_model = AutoEncoderModel(mx.gpu(0), [784,500,500,2000,10], pt_dropout=0.2,
    #    internal_act='relu', output_act='relu')
    ae_model = AutoEncoderModel(mx.cpu(2), [X.shape[1], 100, K],
                                pt_dropout=0.2,
                                internal_act='relu',
                                output_act='relu')

    train_X = X

    #ae_model.layerwise_pretrain(train_X, 256, 50000, 'sgd', l_rate=0.1, decay=0.0,
    #                         lr_scheduler=mx.misc.FactorScheduler(20000,0.1))
    #V = np.zeros((train_X.shape[0],10))
    V = np.random.rand(train_X.shape[0], K) / 10
Example #6
0
        s_true = set(np.where(R_true[i, :] > 0)[1])
        cnt_hit = len(s_rec.intersection(s_true))
        fp.write("%d:" % cnt_hit)
        fp.write(" ".join(map(str, l_rec)))
        fp.write("\n")
    fp.close()


if __name__ == "__main__":

    # give the same p as given in cdl.py
    p = 4
    cal_rec(p, 300)
    dir_save = "cdl%d" % p

    R_test = read_user("cf-test-1-users.dat")
    fp = open(dir_save + "/rec-list.dat")
    lines = fp.readlines()

    total = 0
    correct = 0
    users = 0
    total_items_liked = 0
    num_users = len(range(R_test.shape[0]))

    # recall@M is calculated for M = 50 to 300
    M_low = 50
    M_high = 300
    recall_levels = M_high - M_low + 1
    recallArray = np.zeros(shape=(num_users, recall_levels))
Example #7
0
File: cdl.py Project: pjpan/R
    np.random.seed(1234) # set seed
    lv = 1e-2 # lambda_v/lambda_n in CDL
    dir_save = 'cdl%d' % p
    if not os.path.isdir(dir_save):
        os.system('mkdir %s' % dir_save)
    fp = open(dir_save+'/cdl.log','w')
    print 'p%d: lambda_v/lambda_u/ratio/K: %f/%f/%f/%d' % (p,lambda_v,lambda_u,lv,K)
    fp.write('p%d: lambda_v/lambda_u/ratio/K: %f/%f/%f/%d\n' % \
            (p,lambda_v,lambda_u,lv,K))
    fp.close()
    if is_dummy:
        X = data.get_dummy_mult()
        R = data.read_dummy_user()
    else:
        X = data.get_mult()
        R = data.read_user()
    # set to INFO to see less information during training
    logging.basicConfig(level=logging.DEBUG)
    #ae_model = AutoEncoderModel(mx.gpu(0), [784,500,500,2000,10], pt_dropout=0.2,
    #    internal_act='relu', output_act='relu')

    #mx.cpu() no param needed for cpu.
    ae_model = AutoEncoderModel(mx.cpu(), [X.shape[1],100,K],
        pt_dropout=0.2, internal_act='relu', output_act='relu')

    train_X = X

    #ae_model.layerwise_pretrain(train_X, 256, 50000, 'sgd', l_rate=0.1, decay=0.0,
    #                         lr_scheduler=mx.misc.FactorScheduler(20000,0.1))
    #V = np.zeros((train_X.shape[0],10))
    V = np.random.rand(train_X.shape[0],K)/10
Example #8
0
import csv
from data import read_user
import numpy as np
p = 2
user_id = 1
# read predicted results
dir_save = 'cdl%d' % p
csvReader = csv.reader(open('raw-data.csv', 'rb'))
d_id_title = dict()
for i, row in enumerate(csvReader):
    if i == 0:
        continue
    d_id_title[i - 1] = row[3]
R_test = read_user('cf-test-1-users.dat')
R_train = read_user('cf-train-1-users.dat')
fp = open(dir_save + '/rec-list.dat')
lines = fp.readlines()

s_test = set(np.where(R_test[user_id, :] > 0)[1].A1)
l_train = np.where(R_train[user_id, :] > 0)[1].A1.tolist()
l_pred = map(int, lines[user_id].strip().split(':')[1].split(' '))
print '#####  Articles in the Training Sets  #####'
for i in l_train:
    print d_id_title[i]
print '\n#####  Articles Recommended (Correct Ones Marked by Stars)  #####'
for i in l_pred:
    if i in s_test:
        print '* ' + d_id_title[i]
    else:
        print d_id_title[i]
fp.close()
Example #9
0
import csv
from data import read_user
import numpy as np
p = 2
user_id = 1
# read predicted results
dir_save = 'cdl%d' % p
csvReader = csv.reader(open('raw-data.csv','rb'))
d_id_title = dict()
for i,row in enumerate(csvReader):
    if i==0:
        continue
    d_id_title[i-1] = row[3]
R_test = read_user('cf-test-1-users.dat')
R_train = read_user('cf-train-1-users.dat')
fp = open(dir_save+'/rec-list.dat')
lines = fp.readlines()

s_test = set(np.where(R_test[user_id,:]>0)[1].A1)
l_train = np.where(R_train[user_id,:]>0)[1].A1.tolist()
l_pred = map(int,lines[user_id].strip().split(':')[1].split(' '))
print '#####  Articles in the Training Sets  #####'
for i in l_train:
    print d_id_title[i]
print '\n#####  Articles Recommended (Correct Ones Marked by Stars)  #####'
for i in l_pred:
    if i in s_test:
        print '* '+d_id_title[i]
    else:
        print d_id_title[i]
fp.close()
Example #10
0
        cnt_hit = len(s_rec.intersection(s_true))
        fp.write('%d:' % cnt_hit)
        fp.write(' '.join(map(str, l_rec)))
        fp.write('\n')
    fp.close()


if __name__ == '__main__':

    # give the same p as given in cdl.py
    p = 1
    M_low = 50
    M_high = 300
    cal_rec(p, M_high)
    dir_save = 'cdl%d' % p
    R_test = read_user('data/test_P1_3.dat')
    fp = open(dir_save + '/rec-list.dat')
    lines = fp.readlines()

    total = 0
    correct = 0
    users = 0
    total_items_liked = 0
    num_users = len(range(R_test.shape[0]))

    # recall@M is calculated for M = 50 to 300

    recall_levels = M_high - M_low + 1
    recallArray = np.zeros(shape=(num_users, recall_levels))

    for user_id in range(num_users):
Example #11
0
def main():
    logging.info('reading data')

    item_mat = data.get_mult()

    trainM = sparse.csr_matrix(
        data.read_user(f_in='data/dummy/cf-train-10-users.dat',
                       num_u=50,
                       num_v=1929))
    testM = sparse.csr_matrix(
        data.read_user(f_in='data/dummy/cf-test-10-users.dat',
                       num_u=50,
                       num_v=1929))

    trainList = list()
    testList = list()
    for user in range(trainM.shape[0]):
        negative = 0
        for item in range(trainM.shape[1]):
            if trainM[user, item] == 1:
                trainList.append([user, item, 1])
            else:
                if negative < 20:
                    trainList.append([user, item, 0])
                    negative += 1
        train = np.array(trainList).astype('float32')

    testList = list()
    for user in range(testM.shape[0]):
        negative = 0
        for item in range(testM.shape[1]):
            if testM[user, item] == 1:
                testList.append([user, item, 1])
    #        else:
    #            if negative < 10:
    #                testList.append( [user, item, 0] )
    #                negative+=1
        test = np.array(testList).astype('float32')

    num_item_feat = item_mat.shape[1]

    model = CollaborativeDeepLearning(item_mat, [num_item_feat, 50, 10])
    model.pretrain(lamda_w=0.001, encoder_noise=0.3, epochs=10)
    model_history = model.fineture(train,
                                   test,
                                   lamda_u=0.01,
                                   lamda_v=0.1,
                                   lamda_n=0.1,
                                   lr=0.01,
                                   epochs=500)
    testing_rmse = model.getRMSE(test)
    print('Testing RMSE = {}'.format(testing_rmse))

    import metrics
    print('AUC %s' % metrics.full_auc(model.cdl_model, testM))

    import matplotlib.pyplot as plt
    M_low = 50
    M_high = 300
    recall_levels = M_high - M_low + 1
    recallArray = np.zeros(6)
    x = 0
    for n in [50, 100, 150, 200, 250, 300]:
        test_recall = metrics.recall_at_k(model.cdl_model, testM, k=n)
        recallArray[x] = test_recall
        print('Recall: %.2f.' % (test_recall))
        x += 1
    plt.plot([50, 100, 150, 200, 250, 300], recallArray)
    plt.ylabel("Recall")
    plt.xlabel("M")
    plt.title("Proposed: Recall@M")
    plt.show()