コード例 #1
0
def main():
    iids = get_all_vids(vid)
    for i in range(fold):
        data, truids, triids = get_train_dicts(trpath % i)
        model = BPR(dimension, truids, triids)
        model.train(data, iteration)
        pickle.dump(model, open(modelpath % (i), 'wb'),
                    pickle.HIGHEST_PROTOCOL)
コード例 #2
0
ファイル: bprCythonTest.py プロジェクト: charanpald/bpr
 def testLoss(self): 
     args = BPRArgsPy()   
     #args.bias_regularization = 0
     #args.positive_item_regularization = 0
     #args.negative_item_regularization = 0 
     #args.user_regularization = 0 
     k = 5
     
     for i in range(10): 
         learner = BPR(k, args)   
         sampler = UniformUserUniformItem()        
         user_factors, item_factors, loss_samples = learner.init(self.X, sampler)
         
         ell = learner.loss(loss_samples, user_factors, item_factors)
         #print(ell) 
         
         #Now compare versus python version 
         #args = BPRArgsPy()
         learner2 = BPRPy(k, args)
         learner2.init(self.X)
         learner2.user_factors = user_factors
         learner2.item_factors = item_factors
         learner2.loss_samples = loss_samples
         ell2 = learner2.loss()
         
         self.assertEquals(ell, ell2)
コード例 #3
0
def main(args):
    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())
    seed = 1111
    set_seed(seed)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('device', device, torch.cuda.current_device())

    # exit()

    data_obj = _DATA()
    if "yelp" in args.data_name:
        train_data, valid_data, vocab_obj = data_obj.f_load_data_yelp_restaurant(
            args)

    if "movie" in args.data_name:
        train_data, valid_data, vocab_obj = data_obj.f_load_data_movie(args)

    if "beer" in args.data_name:
        train_data, valid_data, vocab_obj = data_obj.f_load_data_beer(args)

    if "wine" in args.data_name:
        train_data, valid_data, vocab_obj = data_obj.f_load_data_wine(args)

    if "lthing" in args.data_name:
        train_data, valid_data, vocab_obj = data_obj.f_load_data_movie(args)

    if args.train:
        now_time = datetime.now()
        time_name = str(now_time.month) + "_" + str(now_time.day) + "_" + str(
            now_time.hour) + "_" + str(now_time.minute)
        model_file = os.path.join(args.model_path,
                                  args.data_name + "_" + args.model_name)

        if not os.path.isdir(model_file):
            print("create a directory", model_file)
            os.mkdir(model_file)

        args.model_file = model_file + "/model_best_" + time_name + ".pt"
        print("model_file", model_file)

    print("vocab_size", vocab_obj.vocab_size)
    print("user num", vocab_obj.user_num)
    print("item num", vocab_obj.item_num)

    pretrain_model_file = args.pretrain_model_file
    pretrain_network = None
    if pretrain_model_file != "":
        pretrain_network = BPR(vocab_obj, args, device)
        pretrain_model_abs_file = os.path.join(args.model_path,
                                               pretrain_model_file)
        print("pretrain_model_abs_file", pretrain_model_abs_file)
        checkpoint = torch.load(pretrain_model_abs_file)
        pretrain_network.load_state_dict(checkpoint['model'])

    network = _ATTR_NETWORK(vocab_obj, args, device)

    total_param_num = 0
    for name, param in network.named_parameters():
        if param.requires_grad:
            param_num = param.numel()
            total_param_num += param_num
            print(name, "\t", param_num)

    print("total parameters num", total_param_num)

    if args.train:
        logger_obj = _LOGGER()
        logger_obj.f_add_writer(args)

        optimizer = _OPTIM(network.parameters(), args)
        trainer = _TRAINER(vocab_obj, args, device)
        trainer.f_train(pretrain_network, train_data, valid_data, network,
                        optimizer, logger_obj)

        logger_obj.f_close_writer()

    if args.eval:
        print("=" * 10, "eval", "=" * 10)

        eval_obj = _EVAL(vocab_obj, args, device)

        network = network.to(device)

        eval_obj.f_init_eval(network, args.model_file, reload_model=True)

        # eval_obj.f_eval_new_user(train_data, valid_data)
        eval_obj.f_eval(train_data, valid_data)

    if args.test:
        print("=" * 10, "eval", "=" * 10)

        infer_obj = _INFER(vocab_obj, args, device)

        network = network.to(device)

        infer_obj.f_init_infer(network, args.model_file, reload_model=True)

        infer_obj.f_infer(train_data, valid_data)
コード例 #4
0
print("test_set shape: ", np.shape(test_set))

model_para = {
    'user_size': user_size,
    'item_size': item_size,
    'embed_dim': 256,
    'layer': args.layer,
    'batch_size': args.batch_size,
    'iterations': 200,
}
print(model_para)

args.device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = BPR(model_para["user_size"], model_para["item_size"],
            model_para["embed_dim"], args.L2, model_para["layer"],
            args.reg_type).to(args.device)

optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

best_mrr_5, best_mrr_20, best_hit_5, best_hit_20, best_ndcg_5, best_ndcg_20 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0


def pad_train_seq(data):
    max_len = max([len(s) for s in data])
    pad_samples = []
    for idx, s in enumerate(data):
        if len(s) < 1:
            print(s, idx)
        pad_samples.append([s[0]] * (max_len - len(s)) + s)
    return np.array(pad_samples)
コード例 #5
0
print("valid_set shape: ", np.shape(valid_set))
print("test_set shape: ", np.shape(test_set))

model_para = {
    'user_size': user_size,
    'item_size': item_size,
    'embed_dim': 256,
    'layer': args.layer,
    'batch_size':args.batch_size,
    'iterations':200,
}
print(model_para)

args.device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = BPR(model_para["user_size"], model_para["item_size"], model_para["embed_dim"], args.L2,
            model_para["layer"], args.reg_type).to(args.device)
if args.fixlr:
    loc = 1
else:
    if args.difflr:
        loc = (1 + np.cos(np.pi * ((args.num - args.i) / args.num))) / 2
    else:
        loc = 1
print('The initial learning rate is:', args.lr * loc)

optimizer = torch.optim.Adam(model.parameters(), lr=args.lr * loc)

if args.cos == True:
    steps = train_set.shape[0] // model_para['batch_size']
    if train_set.shape[0] % model_para['batch_size'] == 0:
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
コード例 #6
0
    logfilename = 'logs/%s_%s_%s_%s_%s.log' % (
        args.dataset, args.model, str(args.p_emb), str(
            args.p_ctx), str(args.p_proj))
    logging.basicConfig(filename=logfilename,
                        level=logging.INFO,
                        format='%(asctime)s :: %(levelname)s :: %(message)s')
    logging.info('log info to ' + logfilename)

logging.info(args)
if args.dataset == 'amazon':
    ds = ds_amazon(logging, args)
else:
    raise Exception('no dataset' + args.dataset)

if args.model == 'bpr':
    model = BPR(ds, args, logging)
elif args.model == 'cbpr':
    model = CBPR(ds, args, logging)
elif args.model == 'vbpr':
    model = VBPR(ds, args, logging)
elif args.model == 'amr':
    model = AMR(ds, args, logging)
elif args.model == 'mtpr':
    model = MTPR(ds, args, logging)
else:
    raise Exception('unknown model type', args.model)

model.train()

weight_filename = 'weights/%s_%s_%s_%s_%s.npy' % (
    args.dataset, args.model, str(args.p_emb), str(args.p_ctx), str(
コード例 #7
0
data = coo_matrix((vals, idxs.T)).tocsr()
user_item_counts = dict(
    (i + 1, data[i].getnnz()) for i in xrange(data.shape[0]))

print 'creating samples...'
mapper1 = Mapper1(user_item_counts, oversampling=10)
print 'map-red1...'
toydoop.mapreduce(datafile,
                  tmp1,
                  mapper=mapper1,
                  reducer=reducer1,
                  parser=parser)
print 'map2...'
toydoop.mapreduce(datafile, tmp2, mapper=mapper2,
                  parser=parser)  # map the data again
print 'red2...'
toydoop.mapreduce([tmp1, tmp2],
                  sample_file,
                  reducer=reducer2,
                  formatter=formatter)

print 'training...'
args = BPRArgs()
args.learning_rate = 0.3
num_factors = 10
model = BPR(num_factors, args)
sampler = ExternalSchedule(sample_file,
                           index_offset=1)  # schedule is one-indexed
num_iters = 10
model.train(data, sampler, num_iters)
コード例 #8
0
ファイル: presampled_bpr.py プロジェクト: Dellen/bpr
datafile = sys.argv[1]  # one-indexed, whitespace separated
sample_file = datafile+'.samples'
tmp1 = sample_file+'.tmp1'
tmp2 = sample_file+'.tmp2'

print 'reading data...'
data = loadtxt(datafile)
print 'converting to zero-indexed sparse matrix...'
idxs = data[:,:2]-1
vals = data[:,2]
data = coo_matrix((vals,idxs.T)).tocsr()
user_item_counts = dict((i+1,data[i].getnnz()) for i in xrange(data.shape[0]))

print 'creating samples...'
mapper1 = Mapper1(user_item_counts,oversampling=10)
print 'map-red1...'
toydoop.mapreduce(datafile,tmp1,mapper=mapper1,reducer=reducer1,parser=parser)
print 'map2...'
toydoop.mapreduce(datafile,tmp2,mapper=mapper2,parser=parser)  # map the data again
print 'red2...'
toydoop.mapreduce([tmp1,tmp2],sample_file,reducer=reducer2,formatter=formatter)

print 'training...'
args = BPRArgs()
args.learning_rate = 0.3
num_factors = 10
model = BPR(num_factors,args)
sampler = ExternalSchedule(sample_file,index_offset=1)  # schedule is one-indexed
num_iters = 10
model.train(data,sampler,num_iters)
コード例 #9
0
lam = .9
lr = .1

from bpr import BPRArgs, BPR

lrs = []
tests_loss = []
for lam in [.1, .2, .3, .4, .5, .6, .7, .8, .9]:
    print lam
    args = BPRArgs(bias_regularization=lam,
                   user_regularization=lam,
                   positive_item_regularization=lam,
                   negative_item_regularization=lam,
                   learning_rate=lr)
    K = 10
    model = BPR(K, args)

    num_iters = 3
    losses = model.train(data, sampler, num_iters)
    test_loss = model.auc_w_sampler(test_set)
    print "test acu: %f" % test_loss

    lrs.append(losses[-1])
    tests_loss.append(test_loss)

    plt.plot(losses)
    plt.plot(tests_loss)
plt.show()
#
# fn="bpr-breg%.2f-ureg%.2f-lr%.2f-k%d-epochs%d"%(args.bias_regularization, args.user_regularization, args.learning_rate, K, num_iters)
# mdl.save_model(fn,model.item_bias, model.user_factors, model.item_factors)