def svd(train, test, learning_rate=0.0005, reg=0.02, dim=50, batch_size=1000): samples_per_batch = len(train) // batch_size iter_train = dataio.ShuffleIterator([train["user"], train["item"], train["rate"]], batch_size=batch_size) iter_test = dataio.OneEpochIterator([test["user"], test["item"], test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=dim, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=learning_rate, reg=reg, device=DEVICE) pid = int(os.getpid()) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() min_test_err = 9999 for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users, item_batch: items, rate_batch: rates}) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_batch = sess.run(infer, feed_dict={user_batch: users, item_batch: items}) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) min_test_err = min(test_err, min_test_err) print("{:5d} {:3d} {:f} {:f} {:f} {:f}(s)".format(pid, i // samples_per_batch, train_err, test_err, min_test_err, end - start)) train_err_summary = make_scalar_summary("training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end sys.stdout.flush()
def svd(train, test): samples_per_batch = len(train) // BATCH_SIZE iter_train = dataio.ShuffleIterator([train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator([test["user"], test["item"], test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users, item_batch: items, rate_batch: rates}) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_batch = sess.run(infer, feed_dict={user_batch: users, item_batch: items}) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary("training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end
def svd(train, test): samples_per_batch = len(train) // BATCH_SIZE print test.head(10) iter_train = dataio.ShuffleIterator( [train["user"], train["days_since_prior_order"], train["basket_size"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator( [test["user"], test["days_since_prior_order"], test["basket_size"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") days_since_prior_order_batch = tf.placeholder( tf.int32, shape=[None], name="id_days_since_prior_order") basket_size_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, days_since_prior_order_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, basket_size_batch, learning_rate=0.001, reg=0.05, device=DEVICE) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() min = 100 predList = [] actList = [] finalPred = [] finalAct = [] finalpr = [] finalac = [] for i in range(EPOCH_MAX * samples_per_batch): users, days_since_prior_orders, basket_sizes = next(iter_train) _, pred_batch = sess.run( [train_op, infer], feed_dict={ user_batch: users, days_since_prior_order_batch: days_since_prior_orders, basket_size_batch: basket_sizes }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - basket_sizes, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, days_since_prior_orders, basket_sizes in iter_test: pred_batch = sess.run(infer, feed_dict={ user_batch: users, days_since_prior_order_batch: days_since_prior_orders }) #pred_batch = clip(pred_batch) test_err2 = np.append( test_err2, np.power(pred_batch - basket_sizes, 2)) pr = pred_batch ac = basket_sizes end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary( "training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end if train_err < min: min = train_err finalpr = pr finalac = ac return finalpr, finalac
# LEARNING_RATE = 0.1 EPOCH_MAX = 100 LAMBDA_REG = 0.1 LOG_STEP = 101 user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) wins_batch = tf.placeholder(tf.float32, shape=[None], name="nb_wins") fails_batch = tf.placeholder(tf.float32, shape=[None], name="nb_fails") infer, logits, logits_cdf, logits_pdf, regularizer, user_bias, user_features, item_bias, item_features, thresholds = ops.inference_svd(user_batch, item_batch, wins_batch, fails_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.train.get_or_create_global_step() # Attention: only var_list = embd_user, bias_user cost, auc, update_op, train_op = ops.optimization(infer, logits, logits_cdf, logits_pdf, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE, var_list=[user_bias, user_features]) df_train, _, df_test = dataio.get_data() saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, os.path.join(BASE_DIR, "fm.ckpt")) all_user_features = sess.run(user_features, feed_dict={user_batch: range(USER_NUM)}) all_user_features_norms = np.diag(all_user_features.dot(all_user_features.T)) all_user_bias = sess.run(user_bias, feed_dict={user_batch: range(USER_NUM)}) # print('all_features', all_user_features.min(), 'to', all_user_features.max()) # print('all_features_norms', all_user_features_norms.min(), 'to', all_user_features_norms.max()) # print('all_bias', all_user_bias.min(), 'to', all_user_bias.max()) #print('item_features', all_user_bias.min(), 'to', all_user_bias.max())
def svd(train, test, length, moviefile, trainFl=False): print("Movies file length:") print(len(moviefile)) samples_per_batch = len(train) // BATCH_SIZE iter_train = dataio.ShuffleIterator( [train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator( [test["user"], test["item"], test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) #zeros= tf.Variable(tf.zeros([1]),name="zeros") init_op = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="./tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() if trainFl == True: for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) _, pred_batch = sess.run([train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: items }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format( i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary( "training_error", train_err) test_err_summary = make_scalar_summary( "test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end #meta_graph_def = tf.train.export_meta_graph(filename='/tmp/tfrecomm.meta') save_path = saver.save(sess, "./tmp/") else: saver.restore(sess, "./tmp/") # print("Model saved in file: %s" % save_path) # sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # # Bind the socket to the port # server_address = ('0.0.0.0', 81) # print >>sys.stderr, 'starting up on %s port %s' % server_address # sock.bind(server_address) # sock.listen(1) movies = list(range(len(moviefile))) # print (movies) users = [1] pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: movies }) moviesrecomm = list(zip(movies, pred_batch)) smovies = sorted(moviesrecomm, key=lambda x: x[1], reverse=True) print( " Top Movies ------------------------------------------------------------" ) topmovies = smovies[0:10] print(topmovies) # give number between 1 - 5000 data = 3 del users[:] users.append(int(data)) print(users) pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: movies }) moviesrecomm = list(zip(movies, pred_batch)) smovies = sorted(moviesrecomm, key=lambda x: x[1], reverse=True) topmovies = smovies[0:10] print(topmovies) for item in topmovies: itopmovie = item[0] recommendedmovie = moviefile["title"][itopmovie] recommendedtags = moviefile["tags"][itopmovie] #print >>sys.stderr, 'sending data back to the client' # connection.sendall(recommendedmovie+":"+recommendedtags+"\n") #print >>sys.stderr, 'Sent data' return while True: # Wait for a connection print >> sys.stderr, 'waiting for a connection' connection, client_address = sock.accept() try: print >> sys.stderr, 'connection from', client_address # Receive the data in small chunks and retransmit it while True: data = connection.recv(16) print >> sys.stderr, 'received "%s"' % data if data: del users[:] try: user = int(data) except: break users.append(int(data)) print(users) pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: movies }) moviesrecomm = list(zip(movies, pred_batch)) smovies = sorted(moviesrecomm, key=lambda x: x[1], reverse=True) topmovies = smovies[0:10] print(topmovies) for item in topmovies: itopmovie = item[0] recommendedmovie = moviefile["title"][itopmovie] recommendedtags = moviefile["tags"][itopmovie] #print >>sys.stderr, 'sending data back to the client' connection.sendall(recommendedmovie + ":" + recommendedtags + "\n") #print >>sys.stderr, 'Sent data' else: print >> sys.stderr, 'no more data from', client_address break finally: connection.close()
def svd(train, test): samples_per_batch = len(train) // batch_size iter_train = data.ShuffleIterator( [train["user"], train["item"], train["rate"]], batch_size=batch_size) iter_test = data.OneEpochIterator( [test["user"], test["item"], test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.int32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=user_num, item_num=item_num, dim=dim, device=device) _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=device) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(epoch_max * samples_per_batch): users, items, rates = next(iter_train) _, pre_batch = sess.run([train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates }) pred_barch = clip(pred_barch) errors.append(np.power(pre_batch - rates, 200)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_barch = sess.run(infer, feed_dict={ user_batch: user, item_batch: items }) pred_barch = clip(pred_barch) test_err2 = np.append(test_err2, np.power(pred_barch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary( "training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end
def svd(train, test, total): samples_per_batch = len(train) // BATCH_SIZE iter_train = dataio.ShuffleIterator( [train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator( [test["user"], test["item"], test["rate"]], batch_size=-1) iter_totaltest = dataio.OneEpochIterator( [total["user"], total["item"], total["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) _, pred_batch = sess.run([train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: items }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary( "training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end if i == EPOCH_MAX * samples_per_batch - 1: for users, items, rates in iter_totaltest: pred_total = sess.run(infer, feed_dict={ user_batch: users, item_batch: items }) #print(users) #print(items) pred_total = clip(pred_total) print(pred_total.shape) print(pred_total) file = open('result.txt', 'a') for j in pred_total: file.write(str(j) + "\n")
df_train["item"], df_train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator([df_test["user"], df_test["item"], df_test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) def svd(train, test,length,moviefile, trainFl=False): init_op = tf.global_variables_initializer() saver=tf.train.Saver() with tf.Session() as sess: sess.run(init_op) if trainFl == True: summary_writer = tf.summary.FileWriter(logdir="./tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch)
def svd(train, test): samples_per_batch = len(train) // BATCH_SIZE iter_train = dataio.ShuffleIterator([train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator([test["user"], test["item"], test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") item_content_batch = tf.placeholder(tf.float32, shape=[None,ITEM_CONTENT_DIM], name="content_item") user_content_batch = tf.placeholder(tf.float32, shape=[None,USER_CONTENT_DIM], name="content_user") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = cdl.inference(user_batch,user_content_batch,item_batch,item_content_batch,user_num=USER_NUM ,item_num=ITEM_NUM,dim=DIM ,item_autoencoder_input_dim=ITEM_CONTENT_DIM,item_autoencoder_hidden_dims=[50,DIM,50] ,user_autoencoder_input_dim=USER_CONTENT_DIM,user_autoencoder_hidden_dims=[30,DIM,30] ,device="/gpu:0") #infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, # device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) items_content = np.random.randn(BATCH_SIZE,ITEM_CONTENT_DIM).astype(np.float32) user_content = np.random.randn(BATCH_SIZE,USER_CONTENT_DIM).astype(np.float32) _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users, item_batch: items, rate_batch: rates, item_content_batch: items_content, user_content_batch: user_content }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: items_content = np.random.randn(len(users),ITEM_CONTENT_DIM).astype(np.float32) user_content = np.random.randn(len(users),USER_CONTENT_DIM).astype(np.float32) pred_batch = sess.run(infer, feed_dict={user_batch: users, item_batch: items, item_content_batch: items_content, user_content_batch: user_content }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary("training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end
def svd(train, test): # 获取训练集的长度 samples_per_batch = len(train) // BATCH_SIZE # 把列数据转变成行数据,然后随机打散 iter_train = dataio.ShuffleIterator([train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) print(iter_train) iter_test = dataio.OneEpochIterator([test["user"], test["item"], test["rate"]], batch_size=-1) print(iter_test) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) # 初始化变量 init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) # 定义日志输入环境,需要跟ternsorboard的地址对应上 summary_writer = tf.summary.FileWriter(logdir="/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users, item_batch: items, rate_batch: rates}) pred_batch = clip(pred_batch) # np.power指数,计算平方差 errors.append(np.power(pred_batch - rates, 2)) # 输出信息 if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_batch = sess.run(infer, feed_dict={user_batch: users, item_batch: items}) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary("training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end
def svd(train, test): nb_batches = len(train) // BATCH_SIZE iter_train = dataio.ShuffleIterator([ train["user"], train["item"], train["outcome"], train["wins"], train["fails"] ], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator([ test["user"], test["item"], test["outcome"], test["wins"], test["fails"] ], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) wins_batch = tf.placeholder(tf.float32, shape=[None], name="nb_wins") fails_batch = tf.placeholder(tf.float32, shape=[None], name="nb_fails") # infer, logits, logits_cdf, logits_pdf, regularizer, user_bias, user_features, item_bias, item_features, thresholds = ops.inference_svd(user_batch, item_batch, wins_batch, fails_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) infer, logits, regularizer, user_bias, user_features, item_bias, item_features = ops.inference_svd( user_batch, item_batch, wins_batch, fails_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.train.get_or_create_global_step() #cost_l2, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE) cost_nll, train_op = ops.optimization(infer, logits, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE) #cost, train_op = ops.optimization(infer, logits, logits_cdf, logits_pdf, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) saver = tf.train.Saver() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) train_se = deque(maxlen=nb_batches) train_nll = deque(maxlen=nb_batches) train_cost = deque(maxlen=nb_batches) train_acc = deque(maxlen=nb_batches) train_obo = deque(maxlen=nb_batches) train_auc = deque(maxlen=nb_batches) start = time.time() for i in range(EPOCH_MAX * nb_batches): train_users, train_items, train_rates, train_wins, train_fails = next( iter_train) batch_size = len(train_rates) _, train_logits, train_infer = sess.run( [train_op, logits, infer], feed_dict={ user_batch: train_users, item_batch: train_items, rate_batch: train_rates, wins_batch: train_wins, fails_batch: train_fails }) #print('values', train_infer[42], train_logits[42], train_logits_cdf[42], ops.sigmoid(train_logits[42]), ops.sigmoid(train_logits_cdf[42])) # print(train_logits_cdf[42]) # print(train_logits_pdf[42]) # print(train_rates[42]) if DISCRETE: if NB_CLASSES > 2: cost_batch = sess.run(cost, feed_dict={ rate_batch: train_rates, item_batch: train_items, user_batch: train_users, logits_cdf: train_logits_cdf }) # print(train_users[42]) # print(train_items[42]) # print(train_logits_pdf[42]) # print(train_logits_cdf[42]) # print('thr', all_thresholds) # print('infer', train_infer[42]) train_cost.append(cost_batch) train_acc.append(train_infer == train_rates) train_obo.append(abs(train_infer - train_rates) <= 1) train_se.append(np.power(train_infer - train_rates, 2)) else: nll_batch = sess.run(cost_nll, feed_dict={ rate_batch: train_rates, logits: train_logits }) proba_batch = ops.sigmoid(train_logits) train_acc.append(np.round(proba_batch) == train_rates) train_auc.append(roc_auc_score(train_rates, proba_batch)) train_nll.append(nll_batch) else: l2_batch = sess.run(cost_l2, feed_dict={ rate_batch: train_rates, infer: train_infer }) #print('est-ce', np.sum(np.power(train_rates - train_pred_batch, 2))) #print('que = ', l2_batch) #train_se.append(np.power(l2_batch, 2)) train_se.append(np.power(train_rates - train_infer, 2)) if i % nb_batches == 0: # Compute test error train_rmse = np.sqrt(np.mean(train_se)) train_macc = np.mean(train_acc) train_mobo = np.mean(train_obo) train_mauc = np.mean(train_auc) train_mnll = np.mean(train_nll) / BATCH_SIZE train_mcost = np.mean(train_cost) test_se = [] test_acc = [] test_obo = [] test_auc = 0 test_nll = [] test_cost = [] for test_users, test_items, test_rates, test_wins, test_fails in iter_test: test_logits, test_infer = sess.run( [logits, infer], feed_dict={ user_batch: test_users, item_batch: test_items, wins_batch: test_wins, fails_batch: test_fails }) test_size = len(test_rates) # print(test_logits_cdf[42], test_logits_pdf[42]) # print(test_infer[42], test_rates[42]) if DISCRETE: if NB_CLASSES > 2: cost_batch = sess.run(cost, feed_dict={ rate_batch: test_rates, item_batch: test_items, user_batch: test_users }) #print(cost_batch) test_cost.append(cost_batch) test_acc.append(test_infer == test_rates) test_obo.append(abs(test_infer - test_rates) <= 1) test_se.append(np.power(test_infer - test_rates, 2)) else: #train_cost.append(cost_batch) nll_batch = sess.run(cost_nll, feed_dict={ rate_batch: test_rates, logits: test_logits }) proba_batch = ops.sigmoid(test_logits) test_acc.append( np.round(proba_batch) == test_rates) test_auc = roc_auc_score(test_rates, proba_batch) # print(proba_batch[:5], test_rates[:5], test_auc) test_nll.append(nll_batch) else: l2_batch = sess.run(cost_l2, feed_dict={ rate_batch: rates, infer: pred_batch }) test_se.append(np.power(rates - pred_batch, 2)) end = time.time() test_rmse = np.sqrt(np.mean(test_se)) test_macc = np.mean(test_acc) test_mobo = np.mean(test_obo) test_mnll = np.mean(test_nll) / len(test) test_mcost = np.mean(test_cost) if DISCRETE: if NB_CLASSES > 2: print( "{:3d} TRAIN(size={:d}/{:d}, macc={:f}, mobo={:f}, rmse={:f}, mcost={:f}) TEST(size={:d}, macc={:f}, mobo={:f}, rmse={:f}, mcost={:f}) {:f}(s)" .format(i // nb_batches, len(train_users), len(train), train_macc, train_mobo, train_rmse, train_mcost, len(test), test_macc, test_mobo, test_rmse, test_mcost, end - start)) else: print( "{:3d} TRAIN(size={:d}/{:d}, macc={:f}, mauc={:f}, mnll={:f}) TEST(size={:d}, macc={:f}, auc={:f}, mnll={:f}) {:f}(s)" .format( i // nb_batches, len(train_users), len(train), #train_rmse, # rmse={:f} train_macc, train_mauc, train_mnll, len(test), #test_rmse, # rmse={:f} test_macc, test_auc, test_mnll, end - start)) else: print( "{:3d} TRAIN(size={:d}/{:d}, rmse={:f}) TEST(size={:d}, rmse={:f}) {:f}(s)" .format( i // nb_batches, len(train_users), len(train), train_rmse, # rmse={:f} #train_macc, train_mauc, train_mnll, len(test), test_rmse, # rmse={:f} #test_macc, test_mauc, test_mnll, end - start)) train_err_summary = make_scalar_summary( "training_error", train_rmse) test_err_summary = make_scalar_summary("test_error", test_rmse) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end # print('thr', all_thresholds) # Save model print(os.path.join(BASE_DIR, 'fm.ckpt')) saver.save(sess, os.path.join(BASE_DIR, 'fm.ckpt'))
def svd_with_pipe(samples_per_batch): trainfilequeue = tf.train.string_input_producer( ["/tmp/movielens/ml-1m/ratings.dat"], num_epochs=None, shuffle=False) testfilequeue = tf.train.string_input_producer( ["/tmp/movielens/ml-1m/ratings.dat"], num_epochs=None, shuffle=False) reader = tf.TextLineReader() user_batch, item_batch, rate_batch = shuffleInputPipeline( trainfilequeue, reader, BATCH_SIZE, 10) testuser_batch, testitem_batch, testrate_batch = shuffleInputPipeline( testfilequeue, reader, BATCH_SIZE, 10) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) init_op = tf.global_variables_initializer() sess = tf.Session() sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) testusers, testitems, testrates = sess.run( [testuser_batch, testitem_batch, testrate_batch]) errors = deque(maxlen=samples_per_batch) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) try: for i in range(EPOCH_MAX * samples_per_batch): start = time.time() users, items, rates = sess.run( [user_batch, item_batch, rate_batch]) _, pred_batch = sess.run([train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) pred_batch = sess.run(infer, feed_dict={ user_batch: testusers, item_batch: testitems, }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - testrates, 2)) end = time.time() print("{:3d} {:f} {:f} {:f}(s)".format( i // samples_per_batch, train_err, np.sqrt(np.mean(test_err2)), end - start)) start = end except tf.errors.OutOfRangeError: print('Done Training') finally: coord.request_stop() coord.join(threads) sess.close()
def timesvdplusplus(train, test, binsize, ut_mean, maxtime): samples_per_batch = BATCH_SIZE iter_train = dataio.ShuffleIterator( [train["user"], train["item"], train["rate"], train["st"]], batch_size=BATCH_SIZE) iter_test = dataio.ShuffleIterator( [test["user"], test["item"], test["rate"], test["st"]], batch_size=BATCH_SIZE) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) time_batch = tf.placeholder(tf.int32, shape=[None]) rmat_batch = tf.placeholder(tf.float32, shape=[USER_NUM, ITEM_NUM], name="rmat") tu_batch = tf.placeholder(tf.int32, shape=[None]) # infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, # device=DEVICE) infer, regularizer = ops.inference_timesvdplusplus(user_batch, item_batch, time_batch, rmat_batch, tu_batch, binsize, maxtime, user_num=USER_NUM, item_num=ITEM_NUM, batch_size=BATCH_SIZE, dim=DIM) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates, times = next(iter_train) times = times rmat = np.zeros([USER_NUM, ITEM_NUM], dtype=np.float32) rmat[users, items] = float(1.0) _, pred_batch = sess.run( [train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates, time_batch: times, rmat_batch: rmat, tu_batch: ut_mean[users], }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) # print("i:{},errors:{}".format(i,np.sqrt(np.mean(errors)))) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) users, items, rates, times = next(iter_test) rmat = np.zeros([USER_NUM, ITEM_NUM], dtype=np.float32) rmat[users, items] = float(1.0) # print("i:{},users:{},items:{}".format(i,users,items)) pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: items, time_batch: times, rmat_batch: rmat, tu_batch: ut_mean[users], }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary( "training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end
def svdplusplus(train, test): samples_per_batch = BATCH_SIZE iter_train = dataio.ShuffleIterator( [train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.ShuffleIterator( [test["user"], test["item"], test["rate"]], batch_size=BATCH_SIZE) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) rmat_batch = tf.placeholder(tf.float32, shape=[USER_NUM, ITEM_NUM], name="rmat") onecount_sqrt_batch = tf.placeholder(tf.float32, shape=[USER_NUM], name="onecount_sqrt") # infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, # device=DEVICE) infer, regularizer = ops.inference_svdplusplus(user_batch, item_batch, rmat_batch, user_num=USER_NUM, item_num=ITEM_NUM, batch_size=BATCH_SIZE, dim=DIM) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) rvalues = train.values # .pivot(index='user', columns='item', values='rate').values #convert panda dataframe to numpy arrays. rmat = np.zeros((USER_NUM, ITEM_NUM), dtype=float) rows, row_pos = np.unique(rvalues[:, 1], return_inverse=True) cols, col_pos = np.unique(rvalues[:, 2], return_inverse=True) rmat[row_pos, col_pos] = rvalues[:, 3] #rmat = rmat[users[:,None], items] _, pred_batch = sess.run( [train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates, rmat_batch: rmat, }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) # print("i:{},errors:{}".format(i,np.sqrt(np.mean(errors)))) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) users, items, rates = next(iter_test) # print("i:{},users:{},items:{}".format(i,users,items)) pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: items, rmat_batch: rmat, }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary( "training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end