sys.exit("Argument missing - lambda_v is required") print( "===================================MF Option Setting===================================" ) print("\tbinarizing ratings - %s" % binary_rating) print("\tdata path - %s" % data_path) print("\tresult path - %s" % res_dir) print("\tpretrained w2v data path - %s" % pretrain_w2v) print ("\tdimension: %d\n\tlambda_u: %.4f\n\tlambda_v: %.4f\n\tmax_iter: %d\n\tnum_kernel_per_ws: %d" \ % (dimension, lambda_u, lambda_v, max_iter, num_kernel_per_ws)) print( "===========================================================================================" ) R, D_all = data_factory.load(aux_path, binary_rating) train_user = data_factory.read_rating(data_path + '/train_user.dat', binary_rating) train_item = data_factory.read_rating(data_path + '/train_item.dat', binary_rating) valid_user = data_factory.read_rating(data_path + '/valid_user.dat', binary_rating) test_user = data_factory.read_rating(data_path + '/test_user.dat', binary_rating) # for each user, build a query contains user id, ground truth of top_n items, and pre-selected items print("Making query for each user...") query_list = [] all_item_set = set(range(R.shape[1])) for i in range(R.shape[0]): q = Query(i) q.extendGroundTruth(valid_user[0][i]) q.extendGroundTruth(test_user[0][i]) if len(q.ground_truth) != 0:
print("===================================ConvMF Option Setting===================================") print("\taux path - %s" % aux_path) print("\tdata path - %s" % data_path) print("\tresult path - %s" % res_dir) print("\tpretrained w2v data path - %s" % pretrain_w2v) print("\tdimension: %d\n\tlambda_u: %.4f\n\tlambda_v: %.4f\n\tmax_iter: %d\n\tnum_kernel_per_ws: %d" \ % (dimension, lambda_u, lambda_v, max_iter, num_kernel_per_ws)) print("===========================================================================================") R, D_all = data_factory.load(aux_path) CNN_X = D_all['X_sequence'] vocab_size = len(D_all['X_vocab']) + 1 from models import ConvMF if pretrain_w2v is None: init_W = None else: init_W = data_factory.read_pretrained_word2vec( pretrain_w2v, D_all['X_vocab'], emb_dim) train_user = data_factory.read_rating(data_path + '/train_user.dat') train_item = data_factory.read_rating(data_path + '/train_item.dat') valid_user = data_factory.read_rating(data_path + '/valid_user.dat') test_user = data_factory.read_rating(data_path + '/test_user.dat') ConvMF(max_iter=max_iter, res_dir=res_dir, lambda_u=lambda_u, lambda_v=lambda_v, dimension=dimension, vocab_size=vocab_size, init_W=init_W, give_item_weight=give_item_weight, CNN_X=CNN_X, emb_dim=emb_dim, num_kernel_per_ws=num_kernel_per_ws, train_user=train_user, train_item=train_item, valid_user=valid_user, test_user=test_user, R=R)
from data_manager import Data_Factory from rating_models import PMF import pickle import sys import os data_factory = Data_Factory() data = sys.argv[1] file = data.split('/')[2].split('.')[0] test_file = "./test/" + file + "/" output_file = "./outputs/" + file + "/" if not os.path.exists(output_file): os.makedirs(output_file) binary_rating = False R = pickle.load(open(test_file + 'ratings.all', 'rb')) train_user = data_factory.read_rating(test_file + 'train_user.dat', binary_rating) train_item = data_factory.read_rating(test_file + 'train_item.dat', binary_rating) valid_user = data_factory.read_rating(test_file + 'valid_user.dat', binary_rating) test_user = data_factory.read_rating(test_file + 'test_user.dat', binary_rating) '''PMF''' PMF(res_dir=output_file, lambda_u=0.1, lambda_v=0.1, train_user=train_user, train_item=train_item, valid_user=valid_user, test_user=test_user, R=R)
''' import numpy as np def eval_MAE(R, U, V, TS): num_user = U.shape[0] sub_mae = np.zeros(num_user) TS_count = 0 for i in xrange(num_user): idx_item = TS[i] if len(idx_item) == 0: continue TS_count = TS_count + len(idx_item) approx_R_i = U[i].dot(V[idx_item].T) # approx_R[i, idx_item] R_i = R[i] sub_mae[i] = np.abs(approx_R_i - R_i).sum() mae = sub_mae.sum() / TS_count return mae from data_manager import Data_Factory #import numpy as np data_factory = Data_Factory() data_path = '/home/daicoolb/CopeData/test_0723_0.8_100k' #R,D_all,S=data_factory.load(data_path) #train_user=data_factory.read_rating(data_path+'/train_user.dat') test_user = data_factory.read_rating(data_path + '/test_user.dat') U = np.loadtxt(data_path + '/Result/U.dat') V = np.loadtxt(data_path + '/Result/V.dat') print "MAE: %.5f \n " % eval_MAE(test_user[1], U, V, test_user[0])
print "===================================ConvMF Option Setting===================================" print "\taux path - %s" % aux_path print "\tdata path - %s" % data_path print "\tresult path - %s" % res_dir print "\tpretrained w2v data path - %s" % pretrain_w2v print "\tdimension: %d\n\tlambda_u: %.4f\n\tlambda_v: %.4f\n\tmax_iter: %d\n\tnum_kernel_per_ws: %d" \ % (dimension, lambda_u, lambda_v, max_iter, num_kernel_per_ws) print "===========================================================================================" R, D_all = data_factory.load(aux_path) CNN_X = D_all['X_sequence'] vocab_size = len(D_all['X_vocab']) + 1 from models import ConvMF if pretrain_w2v is None: init_W = None else: init_W = data_factory.read_pretrained_word2vec( pretrain_w2v, D_all['X_vocab'], emb_dim) train_user = data_factory.read_rating(data_path + '/train_user.dat') train_item = data_factory.read_rating(data_path + '/train_item.dat') valid_user = data_factory.read_rating(data_path + '/valid_user.dat') test_user = data_factory.read_rating(data_path + '/test_user.dat') ConvMF(max_iter=max_iter, res_dir=res_dir, lambda_u=lambda_u, lambda_v=lambda_v, dimension=dimension, vocab_size=vocab_size, init_W=init_W, give_item_weight=give_item_weight, CNN_X=CNN_X, emb_dim=emb_dim, num_kernel_per_ws=num_kernel_per_ws, train_user=train_user, train_item=train_item, valid_user=valid_user, test_user=test_user, R=R)
print "\tContent: %s" % (print_helper(content_mode)) if 'cnn' in content_mode: print "\tnum_kernel_per_ws: %d\n\tpretrained w2v data path - %s" % ( num_kernel_per_ws, pretrain_w2v) print('\tItem weight %s ' % ('Constant (a=1,b=0,01)' if not give_item_weight else 'Constant (a=1,b=0,01). And f(n)')) if 'cnn_cae' in content_mode: print '\tJoin CNN and CAE outputs method: %s' % ( 'Transfer block' if use_transfer_block else 'Concatenation') print "===========================================================================================" for f in range(1, fold_num + 1): train_user = data_factory.read_rating( os.path.join(data_path, 'fold-{}'.format(f), 'train-fold_{}-users.dat'.format(f))) train_item = data_factory.read_rating( os.path.join(data_path, 'fold-{}'.format(f), 'train-fold_{}-items.dat'.format(f))) # in case of training only on training and test sets if os.path.exists( os.path.join(data_path, 'fold-{}'.format(f), 'validation-fold_{}-users.dat'.format(f))): valid_user = data_factory.read_rating( os.path.join(data_path, 'fold-{}'.format(f), 'validation-fold_{}-users.dat'.format(f))) else: valid_user = None test_user = data_factory.read_rating( os.path.join(data_path, 'fold-{}'.format(f),