Beispiel #1
0
def train(train_data,test_data,user_size,item_size):
    with tf.Session() as sess:
        iterator = tf.data.Iterator.from_structure(train_data.output_types,
                                                   train_data.output_shapes)

        model = NCF.NCF(FLAGS.embedding_size, user_size, item_size, FLAGS.lr,
                        FLAGS.optim, FLAGS.initializer, FLAGS.loss_func, FLAGS.activation,
                        FLAGS.regularizer, iterator, FLAGS.topK, FLAGS.dropout, is_training=True)

        model.build()

        ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
        if ckpt:
            print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("Creating model with fresh parameters.")
            sess.run(tf.global_variables_initializer())
        count = 0
        for epoch in range(FLAGS.epochs):
            sess.run(model.iterator.make_initializer(train_data))
            model.is_training = True
            model.get_data()
            start_time = time.time()

            try:
                while True:
                    model.step(sess, count)
                    count += 1
            except tf.errors.OutOfRangeError:
                print("Epoch %d training " % epoch + "Took: " + time.strftime("%H: %M: %S",
                                                                              time.gmtime(time.time() - start_time)))

            sess.run(model.iterator.make_initializer(test_data))
            model.is_training = False
            model.get_data()
            start_time = time.time()
            HR,MRR,NDCG = [],[],[]
            prediction, label = model.step(sess, None)
            try:
                while True:
                    prediction, label = model.step(sess, None)

                    label = int(label[0])
                    HR.append(Metrics.hit(label, prediction))
                    MRR.append(Metrics.mrr(label, prediction))
                    NDCG.append(Metrics.ndcg(label, prediction))
            except tf.errors.OutOfRangeError:
                hr = np.array(HR).mean()
                mrr = np.array(MRR).mean()
                ndcg = np.array(NDCG).mean()
                print("Epoch %d testing  " % epoch + "Took: " + time.strftime("%H: %M: %S",
                                                                              time.gmtime(time.time() - start_time)))
                print("HR is %.3f, MRR is %.3f, NDCG is %.3f" % (hr, mrr, ndcg))

        ################################## SAVE MODEL ################################
        checkpoint_path = os.path.join(FLAGS.model_dir, "NCF.ckpt")
        model.saver.save(sess, checkpoint_path)
Beispiel #2
0
def infer(train_data, test_data, user_size, item_size):
	config = tf.ConfigProto()
	config.gpu_options.allow_growth = True

	with tf.Session(config=config) as sess:

		############################### CREATE MODEL #############################
		iterator = tf.data.Iterator.from_structure(train_data.output_types, 
								train_data.output_shapes)
		model = NCF.NCF(FLAGS.embedding_size, user_size, item_size,	FLAGS.lr, 
				FLAGS.optim, FLAGS.initializer, FLAGS.loss_func, FLAGS.activation, 
				FLAGS.regularizer, iterator, FLAGS.topK, FLAGS.dropout, is_training=True)
		model.build()
		# train_init_op = iterator.make_initializer(train_data)

		ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
		if ckpt:
			print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
			model.saver.restore(sess, ckpt.model_checkpoint_path)
		else:
			print("model files do not exist")
			exit(1)
		
		############################### Training ####################################
		 
		total_time = 0
		count = 0
		for epoch in range(FLAGS.epochs):
			 
		################################ EVALUATION ##################################
			sess.run(model.iterator.make_initializer(test_data))
			model.is_training = False
			HR, MRR, NDCG = [], [], []
			start_time = time.time()
			try:
				while True:
					prediction, label = model.step(sess, None)
					count = count + 1

					label = int(label[0])
					HR.append(metrics.hit(label, prediction))
					MRR.append(metrics.mrr(label, prediction))
					NDCG.append(metrics.ndcg(label, prediction))
			except tf.errors.OutOfRangeError:
				hr = np.array(HR).mean()
				mrr = np.array(MRR).mean()
				ndcg = np.array(NDCG).mean()
				print("Epoch %d testing  " %epoch + "Took: " + time.strftime("%H: %M: %S", 
									time.gmtime(time.time() - start_time)))
				print("HR is %.3f, MRR is %.3f, NDCG is %.3f" %(hr, mrr, ndcg))
			total_time += time.time() - start_time
		print("Total Epochs: %d on inference " %(epoch+1))
		print("Total recommendations: %d" % (count * FLAGS.batch_size))
		print("Approximate accelerator time in seconds is: %.2f" % total_time)
		print("Approximate accelerator performance in recommendations/second is: %.2f" % (float(count * FLAGS.batch_size)/float(total_time)))
            print('epoch', ep, 'step', i, 'loss', loss.item())
        i += 1

        if i % 5000 == 0:
            print('hit ratio', HitRatio())

        if len(
                user2items
        ) < batch_size:  # really there might be more left, but this is the minimum guaranteed
            ep += 1
            # user2items, item2users = dataset_loader.build_dictionaries()
            user2items, item2users, test_item_pair = dataset_loader.get_dictionaries(
            )


ncf = NCF.NCF_item_item(item_num + 1, 16).cuda()
ncf.join_output_weights()
print('Hit ratio:', HitRatio())

mlp_optimizer = optim.Adam(list(ncf.mlp_item_embeddings.parameters()) +
                           list(ncf.mlp.parameters()) +
                           list(ncf.mlp_out.parameters()),
                           lr=1e-3)
gmf_optimizer = optim.Adam(list(ncf.gmf_item_embeddings.parameters()) +
                           list(ncf.gmf_out.parameters()),
                           lr=1e-3)
ncf_optimizer = optim.Adam(ncf.parameters(), lr=5e-4)

print('\nTraining MLP')
train('mlp', mlp_optimizer, epochs=1)
Beispiel #4
0
def train(train_data, test_data, n_user, n_item):
    with tf.Session() as sess:
        iterator = tf.data.Iterator.from_structure(train_data.output_types,
                                                   train_data.output_shapes)

        model = NCF.NCF(FLAGS.embedding_size, n_user, n_item, FLAGS.lr,
                        FLAGS.optim, FLAGS.initializer, FLAGS.loss_func, FLAGS.activation,
                        FLAGS.regularizer, iterator, FLAGS.topK, FLAGS.dropout, is_training=True)

        model.build()

        # 有参数就读取, 没有就重新训练
        ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
            # 加载模型参数
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("Creating model with fresh parameters.")
            sess.run(tf.global_variables_initializer())

        count = 0
        # 在训练集上训练epochs轮
        for epoch in range(FLAGS.epochs):
            # 训练集的迭代器
            sess.run(model.iterator.make_initializer(train_data))
            model.is_training = True
            model.get_data()
            start_time = time.time()

            try:
                while True:  # 直到生成器没数据, 也就是所有训练数据遍历一次

                    model.step(sess, count)
                    count += 1
            except tf.errors.OutOfRangeError:
                # 打印训练一轮的时间
                print("Epoch %d training " % epoch + "Took: " + time.strftime("%H: %M: %S",
                                                                              time.gmtime(time.time() - start_time)))
            # 测试集的迭代器
            sess.run(model.iterator.make_initializer(test_data))
            model.is_training = False
            model.get_data()
            start_time = time.time()
            HR, MRR, NDCG = [], [], []
            pred_item, gt_item = model.step(sess, None)
            try:
                while True:  # 直到生成器没数据, 也就是所有测试数据遍历一次
                    pred_item, gt_item = model.step(sess, None)
                    # 对于测试集每同一批量数据的item都一样, 所以只取一个
                    gt_item = int(gt_item[0])
                    HR.append(metrics.hit(gt_item, pred_item))
                    MRR.append(metrics.mrr(gt_item, pred_item))
                    NDCG.append(metrics.ndcg(gt_item, pred_item))
            # 评估值取均值
            except tf.errors.OutOfRangeError:
                hr = np.array(HR).mean()
                mrr = np.array(MRR).mean()
                ndcg = np.array(NDCG).mean()
                print("Epoch %d testing  " % epoch + "Took: " + time.strftime("%H: %M: %S",
                                                                              time.gmtime(time.time() - start_time)))
                print("HR is %.3f, MRR is %.3f, NDCG is %.3f" % (hr, mrr, ndcg))

        # 保存模型参数
        checkpoint_path = os.path.join(FLAGS.model_dir, "NCF.ckpt")
        model.saver.save(sess, checkpoint_path)
Beispiel #5
0
        model.compile(optimizer=Adam(lr=learning_rate),
                      loss='binary_crossentropy')
    else:
        model.compile(optimizer=SGD(lr=learning_rate),
                      loss='binary_crossentropy')

    # Load pretrain model
    if PMF_pretrain != '' and NCF_pretrain != '':
        PMF_model = PMF.get_model(train, num_users, num_items,
                                  commonuser + PMFlayers1,
                                  commonitem + PMFlayers1, PMFlayers2)
        PMF_model.load_weights(PMF_pretrain)
        model = load_pretrain_PMF(model, PMF_model, commonuser, PMFlayers1,
                                  PMFlayers2)
        del PMF_model
        NCF_model = NCF.get_model(train, num_users, num_items, commonuser,
                                  commonitem, NCFlayers)
        NCF_model.load_weights(NCF_pretrain)
        model = load_pretrain_NCF(model, NCF_model, commonuser, NCFlayers,
                                  mixing_rate)
        del NCF_model
        print("Load pretrained PMF (%s) and NCF (%s) models done. " %
              (PMF_pretrain, NCF_pretrain))

    # Check Init performance
    (hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, topK,
                                   evaluation_threads)
    hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
    print('Init: HR = %.4f, NDCG = %.4f' % (hr, ndcg))
    best_hr, best_ndcg, best_iter = hr, ndcg, -1
    if args.out > 0:
        model.save_weights(model_out_file, overwrite=True)
Beispiel #6
0
    task = 0
    model = 0
    mod = 2
    iter = 0
    print "1:JNTM 2:SERM 3:NCF 4:DSSM"
    temp = int(input('which baseline'))
    if task == 0:
        if model == 0:
            #attention_model_enhance.train(dl,small_path, dataset, iter_start=iter, mod=mod)
            if temp == 1:
                #JNTM.train(dl,3555,500)
                JNTM.train(dl, small_path)
            elif temp == 2:
                SERM.train(dl, small_path)
            elif temp == 3:
                NCF.train(dl, small_path, dataset)
            elif temp == 4:
                DSSM.train(dl, small_path, dataset)

        elif model == 1:
            SimpleModelDecoder.train(dl,
                                     small_path,
                                     dataset,
                                     iter_start=iter,
                                     mod=mod)
    else:
        if model == 0:
            attention_model_enhance.test(dl,
                                         small_path,
                                         dataset,
                                         iter_start=iter,
Beispiel #7
0
            print('epoch', ep, 'step', i, 'loss', loss.item())
        i += 1

        if i % 5000 == 0:
            print('hit ratio', HitRatio())

        if len(
                user2items
        ) < batch_size:  # really there might be more left, but this is the minimum guaranteed
            ep += 1
            # user2items, item2users = dataset_loader.build_dictionaries()
            user2items, item2users, test_user2items = dataset_loader.get_dictionaries(
            )


ncf = NCF.NeuralCollaborativeFiltering(user_num + 1, item_num + 1, 32).cuda()
ncf.join_output_weights()
print('Hit ratio:', HitRatio())

mlp_optimizer = optim.Adam(list(ncf.mlp_item_embeddings.parameters()) +
                           list(ncf.mlp_user_embeddings.parameters()) +
                           list(ncf.mlp.parameters()) +
                           list(ncf.mlp_out.parameters()),
                           lr=1e-3)
gmf_optimizer = optim.Adam(list(ncf.gmf_item_embeddings.parameters()) +
                           list(ncf.gmf_user_embeddings.parameters()) +
                           list(ncf.gmf_out.parameters()),
                           lr=1e-3)
ncf_optimizer = optim.Adam(ncf.parameters(), lr=5e-4)

print('\nTraining MLP')
Beispiel #8
0
        device = torch.device('cpu')
# logger.info('{} Using device: {}'.format(__name__, device))
print('{} Using device: {}'.format(__name__, device))

# Seeding
if args.seed is not None:
    # logger.info('{} Setting random seed'.format(__name__))
    print('{} Setting random seed'.format(__name__))
    seed = args.seed
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)

model = NCF.NCF(user_num, item_num, args.factor_num, args.num_layers,
                args.dropout, config.config['model_cur'], GMF_model,
                MLP_model).to(device)

# loss_function = nn.CrossEntropyLoss()
criterion = nn.BCEWithLogitsLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.001)
optimizer = optim.Adam(model.parameters(), lr=0.0005)

best_HitRate = 0.0
best_HitRate_NDCG = 0.0  # Without normalization
best_HitRate_epoch = 0

# add log writer for tensorboardX
writer = SummaryWriter('log')

for epoch in range(args.epoch_num):