def infer(train_data, test_data, user_size, item_size): config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: ############################### CREATE MODEL ############################# iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) model = NCF.NCF(FLAGS.embedding_size, user_size, item_size, FLAGS.lr, FLAGS.optim, FLAGS.initializer, FLAGS.loss_func, FLAGS.activation, FLAGS.regularizer, iterator, FLAGS.topK, FLAGS.dropout, is_training=True) model.build() # train_init_op = iterator.make_initializer(train_data) ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir) if ckpt: print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("model files do not exist") exit(1) ############################### Training #################################### total_time = 0 count = 0 for epoch in range(FLAGS.epochs): ################################ EVALUATION ################################## sess.run(model.iterator.make_initializer(test_data)) model.is_training = False HR, MRR, NDCG = [], [], [] start_time = time.time() try: while True: prediction, label = model.step(sess, None) count = count + 1 label = int(label[0]) HR.append(metrics.hit(label, prediction)) MRR.append(metrics.mrr(label, prediction)) NDCG.append(metrics.ndcg(label, prediction)) except tf.errors.OutOfRangeError: hr = np.array(HR).mean() mrr = np.array(MRR).mean() ndcg = np.array(NDCG).mean() print("Epoch %d testing " %epoch + "Took: " + time.strftime("%H: %M: %S", time.gmtime(time.time() - start_time))) print("HR is %.3f, MRR is %.3f, NDCG is %.3f" %(hr, mrr, ndcg)) total_time += time.time() - start_time print("Total Epochs: %d on inference " %(epoch+1)) print("Total recommendations: %d" % (count * FLAGS.batch_size)) print("Approximate accelerator time in seconds is: %.2f" % total_time) print("Approximate accelerator performance in recommendations/second is: %.2f" % (float(count * FLAGS.batch_size)/float(total_time)))
def test(model, sess, test_data, all_items_idx, user_bought): model.is_training = False model.test_first = True all_items_embed = [] HR, MRR, NDCG = [], [], [] ########################## GET ALL ITEM EMBEDDING ONCE ###################### for sample in test_data.get_all_test(): item_embed = model.step(sess, sample, None, None) all_items_embed.append(item_embed[0][0]) model.test_first = False all_items_embed = np.array(all_items_embed) ########################## TEST FOR EACH USER QUERY PAIR ##################### for sample in test_data.get_instance(): item_indices = model.step(sess, sample, all_items_embed, None)[0] itemID = sample[3] reviewerID = sample[4] ranking_list = all_items_idx[item_indices].tolist() top_idx = [] u_bought = user_bought[reviewerID] if reviewerID in user_bought else [] while len( top_idx ) < FLAGS.topK: # delete those items already bought by the user candidate_item = ranking_list.pop() if candidate_item not in u_bought or candidate_item == itemID: top_idx.append(candidate_item) top_idx = np.array(top_idx) HR.append(metrics.hit(itemID, top_idx)) MRR.append(metrics.mrr(itemID, top_idx)) NDCG.append(metrics.ndcg(itemID, top_idx)) hr = np.array(HR).mean() mrr = np.array(MRR).mean() ndcg = np.array(NDCG).mean() print("HR is %.3f, MRR is %.3f, NDCG is %.3f" % (hr, mrr, ndcg))
def train(train_data, test_data, n_user, n_item): with tf.Session() as sess: iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) model = NCF.NCF(FLAGS.embedding_size, n_user, n_item, FLAGS.lr, FLAGS.optim, FLAGS.initializer, FLAGS.loss_func, FLAGS.activation, FLAGS.regularizer, iterator, FLAGS.topK, FLAGS.dropout, is_training=True) model.build() # 有参数就读取, 没有就重新训练 ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir) if ckpt and ckpt.model_checkpoint_path: print("Reading model parameters from %s" % ckpt.model_checkpoint_path) # 加载模型参数 model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("Creating model with fresh parameters.") sess.run(tf.global_variables_initializer()) count = 0 # 在训练集上训练epochs轮 for epoch in range(FLAGS.epochs): # 训练集的迭代器 sess.run(model.iterator.make_initializer(train_data)) model.is_training = True model.get_data() start_time = time.time() try: while True: # 直到生成器没数据, 也就是所有训练数据遍历一次 model.step(sess, count) count += 1 except tf.errors.OutOfRangeError: # 打印训练一轮的时间 print("Epoch %d training " % epoch + "Took: " + time.strftime("%H: %M: %S", time.gmtime(time.time() - start_time))) # 测试集的迭代器 sess.run(model.iterator.make_initializer(test_data)) model.is_training = False model.get_data() start_time = time.time() HR, MRR, NDCG = [], [], [] pred_item, gt_item = model.step(sess, None) try: while True: # 直到生成器没数据, 也就是所有测试数据遍历一次 pred_item, gt_item = model.step(sess, None) # 对于测试集每同一批量数据的item都一样, 所以只取一个 gt_item = int(gt_item[0]) HR.append(metrics.hit(gt_item, pred_item)) MRR.append(metrics.mrr(gt_item, pred_item)) NDCG.append(metrics.ndcg(gt_item, pred_item)) # 评估值取均值 except tf.errors.OutOfRangeError: hr = np.array(HR).mean() mrr = np.array(MRR).mean() ndcg = np.array(NDCG).mean() print("Epoch %d testing " % epoch + "Took: " + time.strftime("%H: %M: %S", time.gmtime(time.time() - start_time))) print("HR is %.3f, MRR is %.3f, NDCG is %.3f" % (hr, mrr, ndcg)) # 保存模型参数 checkpoint_path = os.path.join(FLAGS.model_dir, "NCF.ckpt") model.saver.save(sess, checkpoint_path)
def train(train_data, test_data, user_size, item_size): config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: ############################### CREATE MODEL ############################# iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) model = NCF.NCF(FLAGS.embedding_size, user_size, item_size, FLAGS.lr, FLAGS.optim, FLAGS.initializer, FLAGS.loss_func, FLAGS.activation, FLAGS.regularizer, iterator, FLAGS.topK, FLAGS.dropout, is_training=True) model.build() # train_init_op = iterator.make_initializer(train_data) ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir) if ckpt: print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("Creating model with fresh parameters.") sess.run(tf.global_variables_initializer()) ############################### Training #################################### count = 0 for epoch in range(FLAGS.epochs): sess.run(model.iterator.make_initializer(train_data)) model.is_training = True start_time = time.time() try: while True: model.step(sess, count) count += 1 except tf.errors.OutOfRangeError: print("Epoch %d training " %epoch + "Took: " + time.strftime("%H: %M: %S", time.gmtime(time.time() - start_time))) ################################ EVALUATION ################################## sess.run(model.iterator.make_initializer(test_data)) model.is_training = False start_time = time.time() HR, MRR, NDCG = [], [], [] try: while True: prediction, label = model.step(sess, None) label = int(label[0]) HR.append(metrics.hit(label, prediction)) MRR.append(metrics.mrr(label, prediction)) NDCG.append(metrics.ndcg(label, prediction)) except tf.errors.OutOfRangeError: hr = np.array(HR).mean() mrr = np.array(MRR).mean() ndcg = np.array(NDCG).mean() print("Epoch %d testing " %epoch + "Took: " + time.strftime("%H: %M: %S", time.gmtime(time.time() - start_time))) print("HR is %.3f, MRR is %.3f, NDCG is %.3f" %(hr, mrr, ndcg)) ################################## SAVE MODEL ################################ checkpoint_path = os.path.join(FLAGS.model_dir, "NCF.ckpt") model.saver.save(sess, checkpoint_path)
def train(train_data,test_data,user_size,item_size): with tf.Session() as sess: iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) model = NCF.NCF(FLAGS.embedding_size, user_size, item_size, FLAGS.lr, FLAGS.optim, FLAGS.initializer, FLAGS.loss_func, FLAGS.activation, FLAGS.regularizer, iterator, FLAGS.topK, FLAGS.dropout, is_training=True) model.build() ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir) if ckpt: print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("Creating model with fresh parameters.") sess.run(tf.global_variables_initializer()) count = 0 for epoch in range(FLAGS.epochs): sess.run(model.iterator.make_initializer(train_data)) model.is_training = True model.get_data() start_time = time.time() try: while True: model.step(sess, count) count += 1 except tf.errors.OutOfRangeError: print("Epoch %d training " % epoch + "Took: " + time.strftime("%H: %M: %S", time.gmtime(time.time() - start_time))) sess.run(model.iterator.make_initializer(test_data)) model.is_training = False model.get_data() start_time = time.time() HR,MRR,NDCG = [],[],[] prediction, label = model.step(sess, None) try: while True: prediction, label = model.step(sess, None) label = int(label[0]) HR.append(metrics.hit(label, prediction)) MRR.append(metrics.mrr(label, prediction)) NDCG.append(metrics.ndcg(label, prediction)) except tf.errors.OutOfRangeError: hr = np.array(HR).mean() mrr = np.array(MRR).mean() ndcg = np.array(NDCG).mean() print("Epoch %d testing " % epoch + "Took: " + time.strftime("%H: %M: %S", time.gmtime(time.time() - start_time))) print("HR is %.3f, MRR is %.3f, NDCG is %.3f" % (hr, mrr, ndcg)) ################################## SAVE MODEL ################################ checkpoint_path = os.path.join(FLAGS.model_dir, "NCF.ckpt") model.saver.save(sess, checkpoint_path)