def svd(train, test, learning_rate=0.0005, reg=0.02, dim=50, batch_size=1000): samples_per_batch = len(train) // batch_size iter_train = dataio.ShuffleIterator([train["user"], train["item"], train["rate"]], batch_size=batch_size) iter_test = dataio.OneEpochIterator([test["user"], test["item"], test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=dim, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=learning_rate, reg=reg, device=DEVICE) pid = int(os.getpid()) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() min_test_err = 9999 for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users, item_batch: items, rate_batch: rates}) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_batch = sess.run(infer, feed_dict={user_batch: users, item_batch: items}) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) min_test_err = min(test_err, min_test_err) print("{:5d} {:3d} {:f} {:f} {:f} {:f}(s)".format(pid, i // samples_per_batch, train_err, test_err, min_test_err, end - start)) train_err_summary = make_scalar_summary("training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end sys.stdout.flush()
df_movies.iloc[0].title # # Network + train + test samples_per_batch = len(df_train) // BATCH_SIZE iter_train = dataio.ShuffleIterator([df_train["user"], df_train["item"], df_train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator([df_test["user"], df_test["item"], df_test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE)
def svd(train, test): samples_per_batch = len(train) // BATCH_SIZE iter_train = dataio.ShuffleIterator([train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator([test["user"], test["item"], test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") item_content_batch = tf.placeholder(tf.float32, shape=[None,ITEM_CONTENT_DIM], name="content_item") user_content_batch = tf.placeholder(tf.float32, shape=[None,USER_CONTENT_DIM], name="content_user") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = cdl.inference(user_batch,user_content_batch,item_batch,item_content_batch,user_num=USER_NUM ,item_num=ITEM_NUM,dim=DIM ,item_autoencoder_input_dim=ITEM_CONTENT_DIM,item_autoencoder_hidden_dims=[50,DIM,50] ,user_autoencoder_input_dim=USER_CONTENT_DIM,user_autoencoder_hidden_dims=[30,DIM,30] ,device="/gpu:0") #infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, # device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) items_content = np.random.randn(BATCH_SIZE,ITEM_CONTENT_DIM).astype(np.float32) user_content = np.random.randn(BATCH_SIZE,USER_CONTENT_DIM).astype(np.float32) _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users, item_batch: items, rate_batch: rates, item_content_batch: items_content, user_content_batch: user_content }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: items_content = np.random.randn(len(users),ITEM_CONTENT_DIM).astype(np.float32) user_content = np.random.randn(len(users),USER_CONTENT_DIM).astype(np.float32) pred_batch = sess.run(infer, feed_dict={user_batch: users, item_batch: items, item_content_batch: items_content, user_content_batch: user_content }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary("training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end
def svd(train, test): samples_per_batch = len(train) // BATCH_SIZE iter_train = dataio.ShuffleIterator( [train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator( [test["user"], test["item"], test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) _, train_op = ops.optimiaztion(infer, regularizer, rate_batch, learning_rate=0.15, reg=0.05, device=DEVICE) init_op = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init_op) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) _, pred_batch = sess.run([train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: items }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() print("{:3d} {:f} {:f} {:f}(s)".format( i // samples_per_batch, train_err, np.sqrt(np.mean(test_err2)), end - start)) start = end output_graph_def = tf.python.framework.graph_util.extract_sub_graph( sess.graph.as_graph_def(), ["svd_inference", "svd_regularizer"]) tf.train.SummaryWriter(logdir="/tmp/svd", graph_def=output_graph_def)
def svd(X_train, X_test, feedback_u, DIM, LAMBDA): 'Main SVD code' # learning rate learning = LR # finding the number of batches in train data samples_per_batch = len(X_train) // BATCH_SIZE # initialize earlys topping parameters min_err = 100 # store minimum error counter = 0 # count number of times validation error was above minimum # build iterator objects for train and validation sets iter_train = dataio.ShuffleIterator( [X_train["user"], X_train["item"], X_train["rate"]], batch_size=BATCH_SIZE) iter_val = dataio.OneEpochIterator( [X_test["user"], X_test["item"], X_test["rate"]], batch_size=BATCH_SIZE) '''iter_test = dataio.OneEpochIterator([test["user"], test["item"], test["rate"]], batch_size=BATCH_SIZE)''' # start tensorflow with empty graph (needed when calling svd function multiply times i.e kfold validation) with tf.Graph().as_default(): # Define tensor placeholders (tensor objects that you feed into tensor functions) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) feedback_batch = tf.placeholder(tf.float32, shape=[None, ITEM_NUM]) feedback_mat = tf.placeholder(tf.float32, shape=[USER_NUM, ITEM_NUM]) infer, regularizer = ops.inference_svd(user_batch, item_batch, feedback_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) _, train_op = ops.optimiaztion(infer, regularizer, rate_batch, learning_rate=LR, reg=LAMBDA, device=DEVICE) full_ratings = ops.get_pred(feedback_mat, ITEM_NUM, USER_NUM, DIM, DEVICE) # Initialize all variables function init_op = tf.initialize_all_variables() # Start the tensorflow session with tf.Session() as sess: # initialize variables sess.run(init_op) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) # Time each epoch start = time.time() # Iterate through epochs for i in range(EPOCH_MAX * samples_per_batch): # Generate batch data users, items, rates = next(iter_train) feedback = feedback_u[users.astype('int'), :] # Run the training functions _, pred_batch = sess.run( [train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates, feedback_batch: feedback }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) # Do prediction on the validation set if i % samples_per_batch == 0: #end of epoch train_err = np.sqrt(np.mean(errors)) #train rmse test_err2 = np.array([]) # test rmse # predict validation set using iterator for users, items, rates in iter_val: feedback = feedback_u[users.astype('int'), :] pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: items, feedback_batch: feedback }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() # end timer # Validation error RMSE_val = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format( i // samples_per_batch, train_err, RMSE_val, end - start)) start = end #reset clock # Early stopping check: update minimum error variable if needed, if it did not minimize any further # beyond 50 steps, stop the training and print error if min_err > RMSE_val: min_err = RMSE_val counter = 0 print('Min error updated') else: counter += 1 if counter >= 100: break # Output log information output_graph_def = graph_util.extract_sub_graph( sess.graph.as_graph_def(), ["svd_inference", "svd_regularizer"]) tf.train.SummaryWriter(logdir="/tmp/svd", graph_def=output_graph_def) ratings_mat = sess.run(full_ratings, feed_dict={feedback_mat: feedback_u}) return min_err, clip(ratings_mat)
def svd(train, test): # 获取训练集的长度 samples_per_batch = len(train) // BATCH_SIZE # 把列数据转变成行数据,然后随机打散 iter_train = dataio.ShuffleIterator([train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) print(iter_train) iter_test = dataio.OneEpochIterator([test["user"], test["item"], test["rate"]], batch_size=-1) print(iter_test) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) # 初始化变量 init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) # 定义日志输入环境,需要跟ternsorboard的地址对应上 summary_writer = tf.summary.FileWriter(logdir="/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users, item_batch: items, rate_batch: rates}) pred_batch = clip(pred_batch) # np.power指数,计算平方差 errors.append(np.power(pred_batch - rates, 2)) # 输出信息 if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_batch = sess.run(infer, feed_dict={user_batch: users, item_batch: items}) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary("training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end
def svd(train, test): nb_batches = len(train) // BATCH_SIZE iter_train = dataio.ShuffleIterator([ train["user"], train["item"], train["outcome"], train["wins"], train["fails"] ], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator([ test["user"], test["item"], test["outcome"], test["wins"], test["fails"] ], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) wins_batch = tf.placeholder(tf.float32, shape=[None], name="nb_wins") fails_batch = tf.placeholder(tf.float32, shape=[None], name="nb_fails") # infer, logits, logits_cdf, logits_pdf, regularizer, user_bias, user_features, item_bias, item_features, thresholds = ops.inference_svd(user_batch, item_batch, wins_batch, fails_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) infer, logits, regularizer, user_bias, user_features, item_bias, item_features = ops.inference_svd( user_batch, item_batch, wins_batch, fails_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.train.get_or_create_global_step() #cost_l2, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE) cost_nll, train_op = ops.optimization(infer, logits, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE) #cost, train_op = ops.optimization(infer, logits, logits_cdf, logits_pdf, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) saver = tf.train.Saver() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) train_se = deque(maxlen=nb_batches) train_nll = deque(maxlen=nb_batches) train_cost = deque(maxlen=nb_batches) train_acc = deque(maxlen=nb_batches) train_obo = deque(maxlen=nb_batches) train_auc = deque(maxlen=nb_batches) start = time.time() for i in range(EPOCH_MAX * nb_batches): train_users, train_items, train_rates, train_wins, train_fails = next( iter_train) batch_size = len(train_rates) _, train_logits, train_infer = sess.run( [train_op, logits, infer], feed_dict={ user_batch: train_users, item_batch: train_items, rate_batch: train_rates, wins_batch: train_wins, fails_batch: train_fails }) #print('values', train_infer[42], train_logits[42], train_logits_cdf[42], ops.sigmoid(train_logits[42]), ops.sigmoid(train_logits_cdf[42])) # print(train_logits_cdf[42]) # print(train_logits_pdf[42]) # print(train_rates[42]) if DISCRETE: if NB_CLASSES > 2: cost_batch = sess.run(cost, feed_dict={ rate_batch: train_rates, item_batch: train_items, user_batch: train_users, logits_cdf: train_logits_cdf }) # print(train_users[42]) # print(train_items[42]) # print(train_logits_pdf[42]) # print(train_logits_cdf[42]) # print('thr', all_thresholds) # print('infer', train_infer[42]) train_cost.append(cost_batch) train_acc.append(train_infer == train_rates) train_obo.append(abs(train_infer - train_rates) <= 1) train_se.append(np.power(train_infer - train_rates, 2)) else: nll_batch = sess.run(cost_nll, feed_dict={ rate_batch: train_rates, logits: train_logits }) proba_batch = ops.sigmoid(train_logits) train_acc.append(np.round(proba_batch) == train_rates) train_auc.append(roc_auc_score(train_rates, proba_batch)) train_nll.append(nll_batch) else: l2_batch = sess.run(cost_l2, feed_dict={ rate_batch: train_rates, infer: train_infer }) #print('est-ce', np.sum(np.power(train_rates - train_pred_batch, 2))) #print('que = ', l2_batch) #train_se.append(np.power(l2_batch, 2)) train_se.append(np.power(train_rates - train_infer, 2)) if i % nb_batches == 0: # Compute test error train_rmse = np.sqrt(np.mean(train_se)) train_macc = np.mean(train_acc) train_mobo = np.mean(train_obo) train_mauc = np.mean(train_auc) train_mnll = np.mean(train_nll) / BATCH_SIZE train_mcost = np.mean(train_cost) test_se = [] test_acc = [] test_obo = [] test_auc = 0 test_nll = [] test_cost = [] for test_users, test_items, test_rates, test_wins, test_fails in iter_test: test_logits, test_infer = sess.run( [logits, infer], feed_dict={ user_batch: test_users, item_batch: test_items, wins_batch: test_wins, fails_batch: test_fails }) test_size = len(test_rates) # print(test_logits_cdf[42], test_logits_pdf[42]) # print(test_infer[42], test_rates[42]) if DISCRETE: if NB_CLASSES > 2: cost_batch = sess.run(cost, feed_dict={ rate_batch: test_rates, item_batch: test_items, user_batch: test_users }) #print(cost_batch) test_cost.append(cost_batch) test_acc.append(test_infer == test_rates) test_obo.append(abs(test_infer - test_rates) <= 1) test_se.append(np.power(test_infer - test_rates, 2)) else: #train_cost.append(cost_batch) nll_batch = sess.run(cost_nll, feed_dict={ rate_batch: test_rates, logits: test_logits }) proba_batch = ops.sigmoid(test_logits) test_acc.append( np.round(proba_batch) == test_rates) test_auc = roc_auc_score(test_rates, proba_batch) # print(proba_batch[:5], test_rates[:5], test_auc) test_nll.append(nll_batch) else: l2_batch = sess.run(cost_l2, feed_dict={ rate_batch: rates, infer: pred_batch }) test_se.append(np.power(rates - pred_batch, 2)) end = time.time() test_rmse = np.sqrt(np.mean(test_se)) test_macc = np.mean(test_acc) test_mobo = np.mean(test_obo) test_mnll = np.mean(test_nll) / len(test) test_mcost = np.mean(test_cost) if DISCRETE: if NB_CLASSES > 2: print( "{:3d} TRAIN(size={:d}/{:d}, macc={:f}, mobo={:f}, rmse={:f}, mcost={:f}) TEST(size={:d}, macc={:f}, mobo={:f}, rmse={:f}, mcost={:f}) {:f}(s)" .format(i // nb_batches, len(train_users), len(train), train_macc, train_mobo, train_rmse, train_mcost, len(test), test_macc, test_mobo, test_rmse, test_mcost, end - start)) else: print( "{:3d} TRAIN(size={:d}/{:d}, macc={:f}, mauc={:f}, mnll={:f}) TEST(size={:d}, macc={:f}, auc={:f}, mnll={:f}) {:f}(s)" .format( i // nb_batches, len(train_users), len(train), #train_rmse, # rmse={:f} train_macc, train_mauc, train_mnll, len(test), #test_rmse, # rmse={:f} test_macc, test_auc, test_mnll, end - start)) else: print( "{:3d} TRAIN(size={:d}/{:d}, rmse={:f}) TEST(size={:d}, rmse={:f}) {:f}(s)" .format( i // nb_batches, len(train_users), len(train), train_rmse, # rmse={:f} #train_macc, train_mauc, train_mnll, len(test), test_rmse, # rmse={:f} #test_macc, test_mauc, test_mnll, end - start)) train_err_summary = make_scalar_summary( "training_error", train_rmse) test_err_summary = make_scalar_summary("test_error", test_rmse) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end # print('thr', all_thresholds) # Save model print(os.path.join(BASE_DIR, 'fm.ckpt')) saver.save(sess, os.path.join(BASE_DIR, 'fm.ckpt'))
def timesvdplusplus(train, test, binsize, ut_mean, maxtime): samples_per_batch = BATCH_SIZE iter_train = dataio.ShuffleIterator( [train["user"], train["item"], train["rate"], train["st"]], batch_size=BATCH_SIZE) iter_test = dataio.ShuffleIterator( [test["user"], test["item"], test["rate"], test["st"]], batch_size=BATCH_SIZE) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) time_batch = tf.placeholder(tf.int32, shape=[None]) rmat_batch = tf.placeholder(tf.float32, shape=[USER_NUM, ITEM_NUM], name="rmat") tu_batch = tf.placeholder(tf.int32, shape=[None]) # infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, # device=DEVICE) infer, regularizer = ops.inference_timesvdplusplus(user_batch, item_batch, time_batch, rmat_batch, tu_batch, binsize, maxtime, user_num=USER_NUM, item_num=ITEM_NUM, batch_size=BATCH_SIZE, dim=DIM) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates, times = next(iter_train) times = times rmat = np.zeros([USER_NUM, ITEM_NUM], dtype=np.float32) rmat[users, items] = float(1.0) _, pred_batch = sess.run( [train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates, time_batch: times, rmat_batch: rmat, tu_batch: ut_mean[users], }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) # print("i:{},errors:{}".format(i,np.sqrt(np.mean(errors)))) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) users, items, rates, times = next(iter_test) rmat = np.zeros([USER_NUM, ITEM_NUM], dtype=np.float32) rmat[users, items] = float(1.0) # print("i:{},users:{},items:{}".format(i,users,items)) pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: items, time_batch: times, rmat_batch: rmat, tu_batch: ut_mean[users], }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary( "training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end
def svdplusplus(train, test): samples_per_batch = BATCH_SIZE iter_train = dataio.ShuffleIterator( [train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.ShuffleIterator( [test["user"], test["item"], test["rate"]], batch_size=BATCH_SIZE) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) rmat_batch = tf.placeholder(tf.float32, shape=[USER_NUM, ITEM_NUM], name="rmat") onecount_sqrt_batch = tf.placeholder(tf.float32, shape=[USER_NUM], name="onecount_sqrt") # infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, # device=DEVICE) infer, regularizer = ops.inference_svdplusplus(user_batch, item_batch, rmat_batch, user_num=USER_NUM, item_num=ITEM_NUM, batch_size=BATCH_SIZE, dim=DIM) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) rvalues = train.values # .pivot(index='user', columns='item', values='rate').values #convert panda dataframe to numpy arrays. rmat = np.zeros((USER_NUM, ITEM_NUM), dtype=float) rows, row_pos = np.unique(rvalues[:, 1], return_inverse=True) cols, col_pos = np.unique(rvalues[:, 2], return_inverse=True) rmat[row_pos, col_pos] = rvalues[:, 3] #rmat = rmat[users[:,None], items] _, pred_batch = sess.run( [train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates, rmat_batch: rmat, }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) # print("i:{},errors:{}".format(i,np.sqrt(np.mean(errors)))) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) users, items, rates = next(iter_test) # print("i:{},users:{},items:{}".format(i,users,items)) pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: items, rmat_batch: rmat, }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary( "training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end