Exemple #1
0
        sys.exit("Argument missing - lambda_v is required")

    print(
        "===================================MF Option Setting==================================="
    )
    print("\tbinarizing ratings - %s" % binary_rating)
    print("\tdata path - %s" % data_path)
    print("\tresult path - %s" % res_dir)
    print("\tpretrained w2v data path - %s" % pretrain_w2v)
    print ("\tdimension: %d\n\tlambda_u: %.4f\n\tlambda_v: %.4f\n\tmax_iter: %d\n\tnum_kernel_per_ws: %d" \
        % (dimension, lambda_u, lambda_v, max_iter, num_kernel_per_ws))
    print(
        "==========================================================================================="
    )
    R, D_all = data_factory.load(aux_path, binary_rating)
    train_user = data_factory.read_rating(data_path + '/train_user.dat',
                                          binary_rating)
    train_item = data_factory.read_rating(data_path + '/train_item.dat',
                                          binary_rating)
    valid_user = data_factory.read_rating(data_path + '/valid_user.dat',
                                          binary_rating)
    test_user = data_factory.read_rating(data_path + '/test_user.dat',
                                         binary_rating)
    # for each user, build a query contains user id, ground truth of top_n items, and pre-selected items
    print("Making query for each user...")
    query_list = []
    all_item_set = set(range(R.shape[1]))
    for i in range(R.shape[0]):
        q = Query(i)
        q.extendGroundTruth(valid_user[0][i])
        q.extendGroundTruth(test_user[0][i])
        if len(q.ground_truth) != 0:
Exemple #2
0
    print("===================================ConvMF Option Setting===================================")
    print("\taux path - %s" % aux_path)
    print("\tdata path - %s" % data_path)
    print("\tresult path - %s" % res_dir)
    print("\tpretrained w2v data path - %s" % pretrain_w2v)
    print("\tdimension: %d\n\tlambda_u: %.4f\n\tlambda_v: %.4f\n\tmax_iter: %d\n\tnum_kernel_per_ws: %d" \
          % (dimension, lambda_u, lambda_v, max_iter, num_kernel_per_ws))
    print("===========================================================================================")

    R, D_all = data_factory.load(aux_path)
    CNN_X = D_all['X_sequence']
    vocab_size = len(D_all['X_vocab']) + 1

    from models import ConvMF

    if pretrain_w2v is None:
        init_W = None
    else:
        init_W = data_factory.read_pretrained_word2vec(
            pretrain_w2v, D_all['X_vocab'], emb_dim)

    train_user = data_factory.read_rating(data_path + '/train_user.dat')
    train_item = data_factory.read_rating(data_path + '/train_item.dat')
    valid_user = data_factory.read_rating(data_path + '/valid_user.dat')
    test_user = data_factory.read_rating(data_path + '/test_user.dat')

    ConvMF(max_iter=max_iter, res_dir=res_dir,
           lambda_u=lambda_u, lambda_v=lambda_v, dimension=dimension, vocab_size=vocab_size, init_W=init_W, give_item_weight=give_item_weight, CNN_X=CNN_X, emb_dim=emb_dim,
           num_kernel_per_ws=num_kernel_per_ws,
           train_user=train_user, train_item=train_item, valid_user=valid_user, test_user=test_user, R=R)
Exemple #3
0
from data_manager import Data_Factory
from rating_models import PMF
import pickle
import sys
import os

data_factory = Data_Factory()

data = sys.argv[1]
file = data.split('/')[2].split('.')[0]
test_file = "./test/" + file + "/"
output_file = "./outputs/" + file + "/" 
if not os.path.exists(output_file):
    os.makedirs(output_file)

binary_rating = False

R = pickle.load(open(test_file + 'ratings.all', 'rb'))

train_user = data_factory.read_rating(test_file + 'train_user.dat', binary_rating)
train_item = data_factory.read_rating(test_file + 'train_item.dat', binary_rating)
valid_user = data_factory.read_rating(test_file + 'valid_user.dat', binary_rating)
test_user = data_factory.read_rating(test_file + 'test_user.dat', binary_rating)

'''PMF'''
PMF(res_dir=output_file, lambda_u=0.1, lambda_v=0.1, train_user=train_user, train_item=train_item, valid_user=valid_user, test_user=test_user, R=R)
Exemple #4
0
'''
import numpy as np


def eval_MAE(R, U, V, TS):
    num_user = U.shape[0]
    sub_mae = np.zeros(num_user)
    TS_count = 0
    for i in xrange(num_user):
        idx_item = TS[i]
        if len(idx_item) == 0:
            continue
        TS_count = TS_count + len(idx_item)
        approx_R_i = U[i].dot(V[idx_item].T)  # approx_R[i, idx_item]
        R_i = R[i]
        sub_mae[i] = np.abs(approx_R_i - R_i).sum()
    mae = sub_mae.sum() / TS_count
    return mae


from data_manager import Data_Factory
#import numpy as np
data_factory = Data_Factory()
data_path = '/home/daicoolb/CopeData/test_0723_0.8_100k'
#R,D_all,S=data_factory.load(data_path)
#train_user=data_factory.read_rating(data_path+'/train_user.dat')
test_user = data_factory.read_rating(data_path + '/test_user.dat')
U = np.loadtxt(data_path + '/Result/U.dat')
V = np.loadtxt(data_path + '/Result/V.dat')
print "MAE: %.5f \n " % eval_MAE(test_user[1], U, V, test_user[0])
Exemple #5
0
    print "===================================ConvMF Option Setting==================================="
    print "\taux path - %s" % aux_path
    print "\tdata path - %s" % data_path
    print "\tresult path - %s" % res_dir
    print "\tpretrained w2v data path - %s" % pretrain_w2v
    print "\tdimension: %d\n\tlambda_u: %.4f\n\tlambda_v: %.4f\n\tmax_iter: %d\n\tnum_kernel_per_ws: %d" \
        % (dimension, lambda_u, lambda_v, max_iter, num_kernel_per_ws)
    print "==========================================================================================="

    R, D_all = data_factory.load(aux_path)
    CNN_X = D_all['X_sequence']
    vocab_size = len(D_all['X_vocab']) + 1

    from models import ConvMF

    if pretrain_w2v is None:
        init_W = None
    else:
        init_W = data_factory.read_pretrained_word2vec(
            pretrain_w2v, D_all['X_vocab'], emb_dim)

    train_user = data_factory.read_rating(data_path + '/train_user.dat')
    train_item = data_factory.read_rating(data_path + '/train_item.dat')
    valid_user = data_factory.read_rating(data_path + '/valid_user.dat')
    test_user = data_factory.read_rating(data_path + '/test_user.dat')

    ConvMF(max_iter=max_iter, res_dir=res_dir,
           lambda_u=lambda_u, lambda_v=lambda_v, dimension=dimension, vocab_size=vocab_size, init_W=init_W, give_item_weight=give_item_weight, CNN_X=CNN_X, emb_dim=emb_dim, num_kernel_per_ws=num_kernel_per_ws,
           train_user=train_user, train_item=train_item, valid_user=valid_user, test_user=test_user, R=R)
Exemple #6
0
    print "\tContent: %s" % (print_helper(content_mode))

    if 'cnn' in content_mode:
        print "\tnum_kernel_per_ws: %d\n\tpretrained w2v data path - %s" % (
            num_kernel_per_ws, pretrain_w2v)
    print('\tItem weight %s ' %
          ('Constant (a=1,b=0,01)'
           if not give_item_weight else 'Constant (a=1,b=0,01). And f(n)'))
    if 'cnn_cae' in content_mode:
        print '\tJoin CNN and CAE outputs method: %s' % (
            'Transfer block' if use_transfer_block else 'Concatenation')
    print "==========================================================================================="

    for f in range(1, fold_num + 1):
        train_user = data_factory.read_rating(
            os.path.join(data_path, 'fold-{}'.format(f),
                         'train-fold_{}-users.dat'.format(f)))
        train_item = data_factory.read_rating(
            os.path.join(data_path, 'fold-{}'.format(f),
                         'train-fold_{}-items.dat'.format(f)))
        # in case of training only on training and test sets
        if os.path.exists(
                os.path.join(data_path, 'fold-{}'.format(f),
                             'validation-fold_{}-users.dat'.format(f))):
            valid_user = data_factory.read_rating(
                os.path.join(data_path, 'fold-{}'.format(f),
                             'validation-fold_{}-users.dat'.format(f)))
        else:
            valid_user = None
        test_user = data_factory.read_rating(
            os.path.join(data_path, 'fold-{}'.format(f),