Example #1
0
def main():
    data_name = args.data
    model_select = args.model_select
    rank_out = args.rank
    data_batch_size = 1024
    dropout = args.dropout
    recall_at = [20, 50, 100]
    eval_batch_size = 5000  # the batch size when test
    eval_every = args.eval_every
    num_epoch = 100
    neg = args.neg

    _lr = args.lr
    _decay_lr_every = 2
    _lr_decay = 0.9

    dat = load_data(data_name)
    u_pref = dat['u_pref']
    v_pref = dat['v_pref']
    test_eval = dat['test_eval']
    vali_eval = dat['vali_eval']
    user_content = dat['user_content']
    user_list = dat['user_list']
    item_list = dat['item_list']
    item_warm = np.unique(item_list)
    timer = utils.timer(name='main').tic()

    # prep eval
    eval_batch_size = eval_batch_size
    timer.tic()
    test_eval.init_tf(u_pref,
                      v_pref,
                      user_content,
                      None,
                      eval_batch_size,
                      cold_user=True)  # init data for evaluation
    vali_eval.init_tf(u_pref,
                      v_pref,
                      user_content,
                      None,
                      eval_batch_size,
                      cold_user=True)  # init data for evaluation
    timer.toc('initialized eval data').tic()

    heater = model.Heater(latent_rank_in=u_pref.shape[1],
                          user_content_rank=user_content.shape[1],
                          item_content_rank=0,
                          model_select=model_select,
                          rank_out=rank_out,
                          reg=args.reg,
                          alpha=args.alpha,
                          dim=args.dim)
    heater.build_model()
    heater.build_predictor(recall_at)

    config = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=config) as sess:
        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()
        timer.toc('initialized tf')

        best_epoch = 0
        best_recall = 0  # val
        best_test_recall = 0  # test
        for epoch in range(num_epoch):
            user_array, item_array, target_array = utils.negative_sampling(
                user_list, item_list, neg, item_warm)
            random_idx = np.random.permutation(user_array.shape[0])
            n_targets = len(random_idx)
            data_batch = [(n, min(n + data_batch_size, n_targets))
                          for n in range(0, n_targets, data_batch_size)]
            loss_epoch = 0.
            reg_loss_epoch = 0.
            diff_loss_epoch = 0.
            rec_loss_epoch = 0.
            for (start, stop) in data_batch:

                batch_idx = random_idx[start:stop]
                batch_users = user_array[batch_idx]
                batch_items = item_array[batch_idx]
                batch_targets = target_array[batch_idx]

                # content
                user_content_batch = user_content[batch_users, :].todense()
                # dropout
                if dropout != 0:
                    n_to_drop = int(np.floor(
                        dropout *
                        len(batch_idx)))  # number of u-i pairs to be dropped
                    zero_index = np.random.choice(np.arange(len(batch_idx)),
                                                  n_to_drop,
                                                  replace=False)
                else:
                    zero_index = np.array([])
                dropout_indicator = np.zeros_like(batch_targets).reshape(
                    (-1, 1))
                if len(zero_index) > 0:
                    dropout_indicator[zero_index] = 1

                _, _, loss_out, rec_loss_out, reg_loss_out, diff_loss_out = sess.run(
                    [
                        heater.preds, heater.optimizer, heater.loss,
                        heater.rec_loss, heater.reg_loss, heater.diff_loss
                    ],
                    feed_dict={
                        heater.Uin: u_pref[batch_users, :],
                        heater.Vin: v_pref[batch_items, :],
                        heater.Ucontent: user_content_batch,
                        heater.dropout_user_indicator: dropout_indicator,
                        heater.target: batch_targets,
                        heater.lr_placeholder: _lr,
                        heater.is_training: True
                    })
                loss_epoch += loss_out
                rec_loss_out += rec_loss_out
                reg_loss_epoch += reg_loss_out
                diff_loss_epoch += diff_loss_out
                if np.isnan(loss_epoch):
                    raise Exception('f is nan')

            if (epoch + 1) % _decay_lr_every == 0:
                _lr = _lr_decay * _lr
                print('decayed lr:' + str(_lr))

            if epoch % eval_every == 0:
                recall, precision, ndcg = utils.batch_eval_recall(
                    sess,
                    heater.eval_preds_cold,
                    eval_feed_dict=heater.get_eval_dict,
                    recall_k=recall_at,
                    eval_data=vali_eval)

            # checkpoint
            if np.sum(recall) > np.sum(best_recall):
                best_recall = recall
                test_recall, test_precision, test_ndcg = utils.batch_eval_recall(
                    sess,
                    heater.eval_preds_cold,
                    eval_feed_dict=heater.get_eval_dict,
                    recall_k=recall_at,
                    eval_data=test_eval)
                best_test_recall = test_recall
                best_epoch = epoch

            # print results at every epoch
            timer.toc(
                '%d loss=%.4f reg_loss=%.4f diff_loss=%.4f rec_loss=%.4f' %
                (epoch, loss_epoch / len(data_batch), reg_loss_epoch /
                 len(data_batch), diff_loss_epoch / len(data_batch),
                 rec_loss_epoch / len(data_batch))).tic()
            print('\t\t\t' + '\t '.join([
                ('@' + str(i)).ljust(6) for i in recall_at
            ]))  # ljust: padding to fixed len
            print('Current recall\t\t%s' %
                  (' '.join(['%.6f' % i for i in recall])))
            print('Current precision\t%s' %
                  (' '.join(['%.6f' % i for i in precision])))
            print('Current ndcg\t\t%s' % (' '.join(['%.6f' % i
                                                    for i in ndcg])))
            print('Current test recall\t%s' %
                  (' '.join(['%.6f' % i for i in test_recall])))
            print('Current test precision\t%s' %
                  (' '.join(['%.6f' % i for i in test_precision])))
            print('Current test ndcg\t%s' %
                  (' '.join(['%.6f' % i for i in test_ndcg])))
            print('best[%d] vali recall:\t%s' %
                  (best_epoch, ' '.join(['%.6f' % i for i in best_recall])))
            print('best[%d] test recall:\t%s' %
                  (best_epoch, ' '.join(['%.6f' % i
                                         for i in best_test_recall])))
Example #2
0
def main():
    data_name = args.data
    model_select = args.model_select
    rank_out = args.rank
    data_batch_size = 1024
    dropout = args.dropout
    eval_batch_size = 5000  # the batch size when test
    num_epoch = 100
    neg = args.neg
    _lr = args.lr
    _decay_lr_every = 2
    _lr_decay = 0.9

    dat = load_data(data_name)
    u_pref = dat['u_pref']  # all user pre embedding
    v_pref = dat['v_pref']  # all item pre embedding
    user_content = dat['user_content']  # all item context matrix
    test_eval = dat['test_eval']  # EvalData
    val_eval = dat['val_eval']  # EvalData
    warm_test_eval = dat['warm_test']  # EvalData
    user_list = dat['user_list']  # users of train interactions
    item_list = dat['item_list']  # items of train interactions
    item_warm = np.unique(item_list)  # train item set
    timer = utils.timer(name='main')

    # prep eval
    timer.tic()
    test_eval.init_tf(u_pref, v_pref, user_content, None, eval_batch_size, cold_user=True)  # init data for evaluation
    val_eval.init_tf(u_pref, v_pref, user_content, None, eval_batch_size, cold_user=True)
    warm_test_eval.init_tf(u_pref, v_pref, user_content, None, eval_batch_size, cold_user=True)
    timer.toc('initialized eval data').tic()

    heater = model.Heater(latent_rank_in=u_pref.shape[1],
                          user_content_rank=user_content.shape[1],
                          item_content_rank=0,
                          model_select=model_select,
                          rank_out=rank_out, reg=args.reg, alpha=args.alpha, dim=args.dim)
    heater.build_model()
    heater.build_predictor()

    config = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=config) as sess:
        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()
        timer.toc('initialized tf')

        # original result
        org_warm_test = utils.batch_eval(sess, heater.eval_preds_warm,
                                         eval_feed_dict=heater.get_eval_dict,
                                         eval_data=warm_test_eval,
                                         metric=dat['metric']['warm_test'],
                                         warm=True)

        best_epoch = 0
        patience = 0
        val_auc, best_val_auc = 0., 0.
        best_warm_test = np.zeros(3)
        best_cold_test = np.zeros(3)
        for epoch in range(num_epoch):
            user_array, item_array, target_array = utils.negative_sampling(user_list, item_list, neg, item_warm)
            random_idx = np.random.permutation(user_array.shape[0])
            n_targets = len(random_idx)
            data_batch = [(n, min(n + data_batch_size, n_targets)) for n in
                          range(0, n_targets, data_batch_size)]
            loss_epoch = 0.
            reg_loss_epoch = 0.
            diff_loss_epoch = 0.
            rec_loss_epoch = 0.
            for (start, stop) in data_batch:

                batch_idx = random_idx[start:stop]
                batch_users = user_array[batch_idx]
                batch_items = item_array[batch_idx]
                batch_targets = target_array[batch_idx]

                # dropout
                if dropout != 0:
                    n_to_drop = int(np.floor(dropout * len(batch_idx)))  # number of u-i pairs to be dropped
                    zero_index = np.random.choice(np.arange(len(batch_idx)), n_to_drop, replace=False)
                else:
                    zero_index = np.array([])

                user_content_batch = user_content[batch_users, :].todense()
                dropout_indicator = np.zeros_like(batch_targets).reshape((-1, 1))
                if len(zero_index) > 0:
                    dropout_indicator[zero_index] = 1

                _, _, loss_out, rec_loss_out, reg_loss_out, diff_loss_out = sess.run(
                    [heater.preds, heater.optimizer, heater.loss,
                     heater.rec_loss, heater.reg_loss, heater.diff_loss],
                    feed_dict={
                        heater.Uin: u_pref[batch_users, :],
                        heater.Vin: v_pref[batch_items, :],
                        heater.Ucontent: user_content_batch,
                        heater.dropout_user_indicator: dropout_indicator,
                        heater.target: batch_targets,
                        heater.lr_placeholder: _lr,
                        heater.is_training: True
                    }
                )
                loss_epoch += loss_out
                rec_loss_epoch += rec_loss_out
                reg_loss_epoch += reg_loss_out
                diff_loss_epoch += diff_loss_out
                if np.isnan(loss_epoch):
                    raise Exception('f is nan')

            if (epoch + 1) % _decay_lr_every == 0:
                _lr = _lr_decay * _lr
                print('decayed lr:' + str(_lr))

            val_auc = utils.batch_eval_auc(sess, heater.eval_preds_cold,
                                           eval_feed_dict=heater.get_eval_dict,
                                           eval_data=val_eval)

            # checkpoint
            if val_auc > best_val_auc:
                patience = 0
                best_val_auc = val_auc
                best_warm_test = utils.batch_eval(sess, heater.eval_preds_cold,
                                                  eval_feed_dict=heater.get_eval_dict,
                                                  eval_data=warm_test_eval,
                                                  metric=dat['metric']['warm_test'],
                                                  warm=True)
                best_cold_test = utils.batch_eval(sess, heater.eval_preds_cold,
                                                  eval_feed_dict=heater.get_eval_dict,
                                                  eval_data=test_eval,
                                                  metric=dat['metric']['cold_test'])
                best_epoch = epoch

            # print results at every epoch
            timer.toc('%d loss=%.4f reg_loss=%.4f diff_loss=%.4f rec_loss=%.4f' % (
                epoch, loss_epoch / len(data_batch), reg_loss_epoch / len(data_batch),
                diff_loss_epoch / len(data_batch), rec_loss_epoch / len(data_batch)
            )).tic()
            print('Current val auc:%.4f\tbest:%.4f' % (val_auc, best_val_auc))
            print('\t\t\t\t\t' + '\t '.join([str(i).ljust(6) for i in ['auc', 'hr', 'ndcg']]))  # padding to fixed len
            print('origin warm test:\t%s' % (' '.join(['%.6f' % i for i in org_warm_test])))
            print('best[%d] warm test:\t%s' % (best_epoch, ' '.join(['%.6f' % i for i in best_warm_test])))
            print('best[%d] cold test:\t%s' % (best_epoch, ' '.join(['%.6f' % i for i in best_cold_test])))

            # early stop
            patience += 1
            if patience > 10:
                print(f"Early stop at epoch {epoch}")
                break
dat = load_data(data_name)
u_pref = dat['u_pref']  # all user pre embedding
v_pref = dat['v_pref']  # all item pre embedding
item_content = dat['item_content'].todense()  # all item context matrix
test_eval = dat['cold_eval']  # EvalData
val_eval = dat['val_eval']  # EvalData
warm_test_eval = dat['warm_eval']  # EvalData
user_list = dat['user_list']  # users of train interactions
item_list = dat['item_list']  # items of train interactions
item_warm = np.unique(item_list)  # train item set

timer = utils.timer(name='main').tic()
# build model
heater = model.Heater(latent_rank_in=u_pref.shape[1],
                      user_content_rank=0,
                      item_content_rank=item_content.shape[1],
                      model_select=model_select, rank_out=rank_out,
                      reg=args.reg, alpha=args.alpha, dim=args.dim)
heater.build_model()
heater.build_predictor()

saver = tf.train.Saver()
save_path = './model_save/'
if not os.path.exists(save_path):
    os.makedirs(save_path)
config = tf.ConfigProto(allow_soft_placement=True)
with tf.Session(config=config) as sess:
    tf.global_variables_initializer().run()
    tf.local_variables_initializer().run()
    timer.toc('initialized tf').tic()
Example #4
0
def main():
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    data_name = args.data
    data_batch_size = 1024  # train batch size
    dropout = args.dropout
    num_epoch = 1000
    neg = args.neg  # negative sampling rate
    _lr = args.lr
    _decay_lr_every = 10
    _lr_decay = 0.8

    dat = load_data(data_name)
    u_pref = dat['u_pref']  # all user pre embedding
    v_pref = dat['v_pref']  # all item pre embedding
    item_content = dat['item_content']  # all item context matrix
    item_fake_pref = dat['item_fake_pref']
    test_eval = dat['cold_eval']  # EvalData
    val_eval = dat['val_eval']  # EvalData
    warm_test_eval = dat['warm_eval']  # EvalData
    user_list = dat['user_list']  # users of train interactions
    item_list = dat['item_list']  # items of train interactions
    item_warm = np.unique(item_list)  # train item set

    timer = utils.timer(name='main').tic()
    # build model
    heater = model.Heater(latent_rank_in=u_pref.shape[-1],
                          user_content_rank=0,
                          item_content_rank=item_content.shape[1],
                          args=args)
    heater.build_model()
    heater.build_predictor()

    saver = tf.train.Saver()
    save_path = './model_save/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    config = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=config) as sess:
        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()
        timer.toc('initialized tf').tic()

        best_epoch = 0
        patience = 0
        val_auc, best_val_auc = 0., 0.
        for epoch in range(num_epoch):
            user_array, item_array, target_array = utils.negative_sampling(
                user_list, item_list, neg, item_warm)
            random_idx = np.random.permutation(
                user_array.shape[0])  # 生成一个打乱的 range 序列作为下标
            data_batch = [(n, min(n + data_batch_size, len(random_idx)))
                          for n in range(0, len(random_idx), data_batch_size)]
            loss_epoch = 0.
            reg_loss_epoch = 0.
            diff_loss_epoch = 0.
            rec_loss_epoch = 0.
            for (start, stop) in data_batch:

                batch_idx = random_idx[start:stop]
                batch_users = user_array[batch_idx]
                batch_items = item_array[batch_idx]
                batch_targets = target_array[batch_idx]

                # content
                item_content_batch = item_content[batch_items, :].todense()
                # dropout: used in randomized training
                # indicator's target is the CF pretrain rep
                # set the dropped rows' position in indicator to be 1
                if dropout != 0:
                    n_to_drop = int(np.floor(
                        dropout *
                        len(batch_idx)))  # number of u-i pairs to be dropped
                    zero_index = np.random.choice(np.arange(len(batch_idx)),
                                                  n_to_drop,
                                                  replace=False)
                else:
                    zero_index = np.array([])
                dropout_indicator = np.zeros_like(batch_targets).reshape(
                    (-1, 1))
                dropout_indicator[zero_index] = 1

                _, _, loss_out, rec_loss_out, reg_loss_out, diff_loss_out = sess.run(
                    [
                        heater.preds, heater.optimizer, heater.loss,
                        heater.rec_loss, heater.reg_loss, heater.diff_loss
                    ],
                    feed_dict={
                        heater.Uin: u_pref[batch_users, :],
                        heater.Vin: v_pref[batch_items, :],
                        heater.Vcontent: item_content_batch,
                        heater.fake_v_nei_1: item_fake_pref[batch_items, 1, :],
                        heater.fake_v_nei_2: item_fake_pref[batch_items, 2, :],
                        heater.dropout_item_indicator: dropout_indicator,
                        heater.target: batch_targets,
                        heater.lr_placeholder: _lr,
                        heater.is_training: True
                    })
                loss_epoch += loss_out
                rec_loss_epoch += rec_loss_out
                reg_loss_epoch += reg_loss_out
                diff_loss_epoch += diff_loss_out
                if np.isnan(loss_epoch):
                    raise Exception('f is nan')

            timer.toc(
                '%d loss=%.4f reg_loss=%.4f diff_loss=%.4f rec_loss=%.4f' %
                (epoch, loss_epoch / len(data_batch), reg_loss_epoch /
                 len(data_batch), diff_loss_epoch / len(data_batch),
                 rec_loss_epoch / len(data_batch))).tic()
            if (epoch + 1) % _decay_lr_every == 0:
                _lr = _lr_decay * _lr
                print('decayed lr:' + str(_lr))

            # eval on val
            val_auc = utils.batch_eval(sess,
                                       heater.eval_preds_cold,
                                       eval_feed_dict=heater.get_eval_dict,
                                       eval_data=val_eval,
                                       U_pref=u_pref,
                                       V_pref=v_pref,
                                       excluded_dict=dat['pos_nb'],
                                       V_content=item_content,
                                       v_fake_pref=item_fake_pref,
                                       val=True)
            # if get a better eval result on val, update test result
            # best_recall and best_test_recall are global variables while others are local ones
            if val_auc > best_val_auc:
                saver.save(sess, save_path + args.data + args.warm_model)
                patience = 0
                best_val_auc = val_auc
                best_epoch = epoch
            # print val results at every epoch
            timer.toc('[%d/10] Current val auc:%.4f\tbest:%.4f' %
                      (patience, val_auc, best_val_auc)).tic()

            # early stop
            patience += 1
            if patience > 10:
                print(f"Early stop at epoch {epoch}")
                break

        saver.restore(sess, save_path + args.data + args.warm_model)
        best_warm_test = utils.batch_eval(sess,
                                          heater.eval_preds_cold,
                                          eval_feed_dict=heater.get_eval_dict,
                                          eval_data=warm_test_eval,
                                          U_pref=u_pref,
                                          V_pref=v_pref,
                                          excluded_dict=dat['pos_nb'],
                                          V_content=item_content,
                                          v_fake_pref=item_fake_pref,
                                          metric=dat['metric']['warm_test'],
                                          warm=True)
        best_cold_test = utils.batch_eval(sess,
                                          heater.eval_preds_cold,
                                          eval_feed_dict=heater.get_eval_dict,
                                          eval_data=test_eval,
                                          U_pref=u_pref,
                                          V_pref=v_pref,
                                          excluded_dict=dat['pos_nb'],
                                          V_content=item_content,
                                          v_fake_pref=item_fake_pref,
                                          metric=dat['metric']['cold_test'])
        timer.toc('Test').tic()
        print('\t\t\t\t\t' + '\t '.join(
            [str(i).ljust(6)
             for i in ['auc', 'hr', 'ndcg']]))  # padding to fixed len
        print('best[%d] warm test:\t%s' %
              (best_epoch, ' '.join(['%.6f' % i for i in best_warm_test])))
        print('best[%d] cold test:\t%s' %
              (best_epoch, ' '.join(['%.6f' % i for i in best_cold_test])))