def svd(train, test, learning_rate=0.0005, reg=0.02, dim=50, batch_size=1000): samples_per_batch = len(train) // batch_size iter_train = dataio.ShuffleIterator([train["user"], train["item"], train["rate"]], batch_size=batch_size) iter_test = dataio.OneEpochIterator([test["user"], test["item"], test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=dim, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=learning_rate, reg=reg, device=DEVICE) pid = int(os.getpid()) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() min_test_err = 9999 for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users, item_batch: items, rate_batch: rates}) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_batch = sess.run(infer, feed_dict={user_batch: users, item_batch: items}) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) min_test_err = min(test_err, min_test_err) print("{:5d} {:3d} {:f} {:f} {:f} {:f}(s)".format(pid, i // samples_per_batch, train_err, test_err, min_test_err, end - start)) train_err_summary = make_scalar_summary("training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end sys.stdout.flush()
def svd(train, test): samples_per_batch = len(train) // BATCH_SIZE iter_train = dataio.ShuffleIterator([train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator([test["user"], test["item"], test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users, item_batch: items, rate_batch: rates}) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_batch = sess.run(infer, feed_dict={user_batch: users, item_batch: items}) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary("training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end
def svd(train, test): samples_per_batch = len(train) // BATCH_SIZE iter_train = dataio.ShuffleIterator([train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator([test["user"], test["item"], test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) _, train_op = ops.optimiaztion(infer, regularizer, rate_batch, learning_rate=0.15, reg=0.05, device=DEVICE) init_op = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init_op) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users, item_batch: items, rate_batch: rates}) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_batch = sess.run(infer, feed_dict={user_batch: users, item_batch: items}) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, np.sqrt(np.mean(test_err2)), end - start)) start = end output_graph_def = tf.python.client.graph_util.extract_sub_graph(sess.graph.as_graph_def(), ["svd_inference", "svd_regularizer"]) tf.train.SummaryWriter(logdir="/tmp/svd", graph_def=output_graph_def)
def svd(train, test): samples_per_batch = len(train) // BATCH_SIZE print test.head(10) iter_train = dataio.ShuffleIterator( [train["user"], train["days_since_prior_order"], train["basket_size"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator( [test["user"], test["days_since_prior_order"], test["basket_size"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") days_since_prior_order_batch = tf.placeholder( tf.int32, shape=[None], name="id_days_since_prior_order") basket_size_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, days_since_prior_order_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, basket_size_batch, learning_rate=0.001, reg=0.05, device=DEVICE) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() min = 100 predList = [] actList = [] finalPred = [] finalAct = [] finalpr = [] finalac = [] for i in range(EPOCH_MAX * samples_per_batch): users, days_since_prior_orders, basket_sizes = next(iter_train) _, pred_batch = sess.run( [train_op, infer], feed_dict={ user_batch: users, days_since_prior_order_batch: days_since_prior_orders, basket_size_batch: basket_sizes }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - basket_sizes, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, days_since_prior_orders, basket_sizes in iter_test: pred_batch = sess.run(infer, feed_dict={ user_batch: users, days_since_prior_order_batch: days_since_prior_orders }) #pred_batch = clip(pred_batch) test_err2 = np.append( test_err2, np.power(pred_batch - basket_sizes, 2)) pr = pred_batch ac = basket_sizes end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary( "training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end if train_err < min: min = train_err finalpr = pr finalac = ac return finalpr, finalac
LEARNING_RATE = 5 * 1e-3 # LEARNING_RATE = 0.1 EPOCH_MAX = 100 LAMBDA_REG = 0.1 LOG_STEP = 101 user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) wins_batch = tf.placeholder(tf.float32, shape=[None], name="nb_wins") fails_batch = tf.placeholder(tf.float32, shape=[None], name="nb_fails") infer, logits, logits_cdf, logits_pdf, regularizer, user_bias, user_features, item_bias, item_features, thresholds = ops.inference_svd(user_batch, item_batch, wins_batch, fails_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.train.get_or_create_global_step() # Attention: only var_list = embd_user, bias_user cost, auc, update_op, train_op = ops.optimization(infer, logits, logits_cdf, logits_pdf, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE, var_list=[user_bias, user_features]) df_train, _, df_test = dataio.get_data() saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, os.path.join(BASE_DIR, "fm.ckpt")) all_user_features = sess.run(user_features, feed_dict={user_batch: range(USER_NUM)}) all_user_features_norms = np.diag(all_user_features.dot(all_user_features.T)) all_user_bias = sess.run(user_bias, feed_dict={user_batch: range(USER_NUM)}) # print('all_features', all_user_features.min(), 'to', all_user_features.max())
def svd(train, test, length, moviefile, trainFl=False): print("Movies file length:") print(len(moviefile)) samples_per_batch = len(train) // BATCH_SIZE iter_train = dataio.ShuffleIterator( [train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator( [test["user"], test["item"], test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) #zeros= tf.Variable(tf.zeros([1]),name="zeros") init_op = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="./tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() if trainFl == True: for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) _, pred_batch = sess.run([train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: items }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format( i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary( "training_error", train_err) test_err_summary = make_scalar_summary( "test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end #meta_graph_def = tf.train.export_meta_graph(filename='/tmp/tfrecomm.meta') save_path = saver.save(sess, "./tmp/") else: saver.restore(sess, "./tmp/") # print("Model saved in file: %s" % save_path) # sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # # Bind the socket to the port # server_address = ('0.0.0.0', 81) # print >>sys.stderr, 'starting up on %s port %s' % server_address # sock.bind(server_address) # sock.listen(1) movies = list(range(len(moviefile))) # print (movies) users = [1] pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: movies }) moviesrecomm = list(zip(movies, pred_batch)) smovies = sorted(moviesrecomm, key=lambda x: x[1], reverse=True) print( " Top Movies ------------------------------------------------------------" ) topmovies = smovies[0:10] print(topmovies) # give number between 1 - 5000 data = 3 del users[:] users.append(int(data)) print(users) pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: movies }) moviesrecomm = list(zip(movies, pred_batch)) smovies = sorted(moviesrecomm, key=lambda x: x[1], reverse=True) topmovies = smovies[0:10] print(topmovies) for item in topmovies: itopmovie = item[0] recommendedmovie = moviefile["title"][itopmovie] recommendedtags = moviefile["tags"][itopmovie] #print >>sys.stderr, 'sending data back to the client' # connection.sendall(recommendedmovie+":"+recommendedtags+"\n") #print >>sys.stderr, 'Sent data' return while True: # Wait for a connection print >> sys.stderr, 'waiting for a connection' connection, client_address = sock.accept() try: print >> sys.stderr, 'connection from', client_address # Receive the data in small chunks and retransmit it while True: data = connection.recv(16) print >> sys.stderr, 'received "%s"' % data if data: del users[:] try: user = int(data) except: break users.append(int(data)) print(users) pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: movies }) moviesrecomm = list(zip(movies, pred_batch)) smovies = sorted(moviesrecomm, key=lambda x: x[1], reverse=True) topmovies = smovies[0:10] print(topmovies) for item in topmovies: itopmovie = item[0] recommendedmovie = moviefile["title"][itopmovie] recommendedtags = moviefile["tags"][itopmovie] #print >>sys.stderr, 'sending data back to the client' connection.sendall(recommendedmovie + ":" + recommendedtags + "\n") #print >>sys.stderr, 'Sent data' else: print >> sys.stderr, 'no more data from', client_address break finally: connection.close()
def svd(train, test): samples_per_batch = len(train) // batch_size iter_train = data.ShuffleIterator( [train["user"], train["item"], train["rate"]], batch_size=batch_size) iter_test = data.OneEpochIterator( [test["user"], test["item"], test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.int32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=user_num, item_num=item_num, dim=dim, device=device) _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=device) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(epoch_max * samples_per_batch): users, items, rates = next(iter_train) _, pre_batch = sess.run([train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates }) pred_barch = clip(pred_barch) errors.append(np.power(pre_batch - rates, 200)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_barch = sess.run(infer, feed_dict={ user_batch: user, item_batch: items }) pred_barch = clip(pred_barch) test_err2 = np.append(test_err2, np.power(pred_barch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary( "training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end
def svd(train, test, total): samples_per_batch = len(train) // BATCH_SIZE iter_train = dataio.ShuffleIterator( [train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator( [test["user"], test["item"], test["rate"]], batch_size=-1) iter_totaltest = dataio.OneEpochIterator( [total["user"], total["item"], total["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) _, pred_batch = sess.run([train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: items }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary( "training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end if i == EPOCH_MAX * samples_per_batch - 1: for users, items, rates in iter_totaltest: pred_total = sess.run(infer, feed_dict={ user_batch: users, item_batch: items }) #print(users) #print(items) pred_total = clip(pred_total) print(pred_total.shape) print(pred_total) file = open('result.txt', 'a') for j in pred_total: file.write(str(j) + "\n")
iter_train = dataio.ShuffleIterator([df_train["user"], df_train["item"], df_train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator([df_test["user"], df_test["item"], df_test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) def svd(train, test,length,moviefile, trainFl=False): init_op = tf.global_variables_initializer() saver=tf.train.Saver() with tf.Session() as sess: sess.run(init_op) if trainFl == True: summary_writer = tf.summary.FileWriter(logdir="./tmp/svd/log", graph=sess.graph)
def svd(train, test): samples_per_batch = len(train) // BATCH_SIZE iter_train = dataio.ShuffleIterator( [train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator( [test["user"], test["item"], test["rate"]], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) _, train_op = ops.optimiaztion(infer, regularizer, rate_batch, learning_rate=0.15, reg=0.05, device=DEVICE) init_op = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init_op) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) _, pred_batch = sess.run([train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: items }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() print("{:3d} {:f} {:f} {:f}(s)".format( i // samples_per_batch, train_err, np.sqrt(np.mean(test_err2)), end - start)) start = end output_graph_def = tf.python.framework.graph_util.extract_sub_graph( sess.graph.as_graph_def(), ["svd_inference", "svd_regularizer"]) tf.train.SummaryWriter(logdir="/tmp/svd", graph_def=output_graph_def)
def svd(X_train, X_test, feedback_u, DIM, LAMBDA): 'Main SVD code' # learning rate learning = LR # finding the number of batches in train data samples_per_batch = len(X_train) // BATCH_SIZE # initialize earlys topping parameters min_err = 100 # store minimum error counter = 0 # count number of times validation error was above minimum # build iterator objects for train and validation sets iter_train = dataio.ShuffleIterator( [X_train["user"], X_train["item"], X_train["rate"]], batch_size=BATCH_SIZE) iter_val = dataio.OneEpochIterator( [X_test["user"], X_test["item"], X_test["rate"]], batch_size=BATCH_SIZE) '''iter_test = dataio.OneEpochIterator([test["user"], test["item"], test["rate"]], batch_size=BATCH_SIZE)''' # start tensorflow with empty graph (needed when calling svd function multiply times i.e kfold validation) with tf.Graph().as_default(): # Define tensor placeholders (tensor objects that you feed into tensor functions) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) feedback_batch = tf.placeholder(tf.float32, shape=[None, ITEM_NUM]) feedback_mat = tf.placeholder(tf.float32, shape=[USER_NUM, ITEM_NUM]) infer, regularizer = ops.inference_svd(user_batch, item_batch, feedback_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) _, train_op = ops.optimiaztion(infer, regularizer, rate_batch, learning_rate=LR, reg=LAMBDA, device=DEVICE) full_ratings = ops.get_pred(feedback_mat, ITEM_NUM, USER_NUM, DIM, DEVICE) # Initialize all variables function init_op = tf.initialize_all_variables() # Start the tensorflow session with tf.Session() as sess: # initialize variables sess.run(init_op) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) # Time each epoch start = time.time() # Iterate through epochs for i in range(EPOCH_MAX * samples_per_batch): # Generate batch data users, items, rates = next(iter_train) feedback = feedback_u[users.astype('int'), :] # Run the training functions _, pred_batch = sess.run( [train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates, feedback_batch: feedback }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) # Do prediction on the validation set if i % samples_per_batch == 0: #end of epoch train_err = np.sqrt(np.mean(errors)) #train rmse test_err2 = np.array([]) # test rmse # predict validation set using iterator for users, items, rates in iter_val: feedback = feedback_u[users.astype('int'), :] pred_batch = sess.run(infer, feed_dict={ user_batch: users, item_batch: items, feedback_batch: feedback }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() # end timer # Validation error RMSE_val = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format( i // samples_per_batch, train_err, RMSE_val, end - start)) start = end #reset clock # Early stopping check: update minimum error variable if needed, if it did not minimize any further # beyond 50 steps, stop the training and print error if min_err > RMSE_val: min_err = RMSE_val counter = 0 print('Min error updated') else: counter += 1 if counter >= 100: break # Output log information output_graph_def = graph_util.extract_sub_graph( sess.graph.as_graph_def(), ["svd_inference", "svd_regularizer"]) tf.train.SummaryWriter(logdir="/tmp/svd", graph_def=output_graph_def) ratings_mat = sess.run(full_ratings, feed_dict={feedback_mat: feedback_u}) return min_err, clip(ratings_mat)
def svd(train, test): # 获取训练集的长度 samples_per_batch = len(train) // BATCH_SIZE # 把列数据转变成行数据,然后随机打散 iter_train = dataio.ShuffleIterator([train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE) print(iter_train) iter_test = dataio.OneEpochIterator([test["user"], test["item"], test["rate"]], batch_size=-1) print(iter_test) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) # 初始化变量 init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) # 定义日志输入环境,需要跟ternsorboard的地址对应上 summary_writer = tf.summary.FileWriter(logdir="/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) errors = deque(maxlen=samples_per_batch) start = time.time() for i in range(EPOCH_MAX * samples_per_batch): users, items, rates = next(iter_train) _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users, item_batch: items, rate_batch: rates}) pred_batch = clip(pred_batch) # np.power指数,计算平方差 errors.append(np.power(pred_batch - rates, 2)) # 输出信息 if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) for users, items, rates in iter_test: pred_batch = sess.run(infer, feed_dict={user_batch: users, item_batch: items}) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2)) end = time.time() test_err = np.sqrt(np.mean(test_err2)) print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start)) train_err_summary = make_scalar_summary("training_error", train_err) test_err_summary = make_scalar_summary("test_error", test_err) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end
def svd(train, test): nb_batches = len(train) // BATCH_SIZE iter_train = dataio.ShuffleIterator([ train["user"], train["item"], train["outcome"], train["wins"], train["fails"] ], batch_size=BATCH_SIZE) iter_test = dataio.OneEpochIterator([ test["user"], test["item"], test["outcome"], test["wins"], test["fails"] ], batch_size=-1) user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item") rate_batch = tf.placeholder(tf.float32, shape=[None]) wins_batch = tf.placeholder(tf.float32, shape=[None], name="nb_wins") fails_batch = tf.placeholder(tf.float32, shape=[None], name="nb_fails") # infer, logits, logits_cdf, logits_pdf, regularizer, user_bias, user_features, item_bias, item_features, thresholds = ops.inference_svd(user_batch, item_batch, wins_batch, fails_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) infer, logits, regularizer, user_bias, user_features, item_bias, item_features = ops.inference_svd( user_batch, item_batch, wins_batch, fails_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.train.get_or_create_global_step() #cost_l2, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE) cost_nll, train_op = ops.optimization(infer, logits, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE) #cost, train_op = ops.optimization(infer, logits, logits_cdf, logits_pdf, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) saver = tf.train.Saver() with tf.Session() as sess: sess.run(init_op) summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) train_se = deque(maxlen=nb_batches) train_nll = deque(maxlen=nb_batches) train_cost = deque(maxlen=nb_batches) train_acc = deque(maxlen=nb_batches) train_obo = deque(maxlen=nb_batches) train_auc = deque(maxlen=nb_batches) start = time.time() for i in range(EPOCH_MAX * nb_batches): train_users, train_items, train_rates, train_wins, train_fails = next( iter_train) batch_size = len(train_rates) _, train_logits, train_infer = sess.run( [train_op, logits, infer], feed_dict={ user_batch: train_users, item_batch: train_items, rate_batch: train_rates, wins_batch: train_wins, fails_batch: train_fails }) #print('values', train_infer[42], train_logits[42], train_logits_cdf[42], ops.sigmoid(train_logits[42]), ops.sigmoid(train_logits_cdf[42])) # print(train_logits_cdf[42]) # print(train_logits_pdf[42]) # print(train_rates[42]) if DISCRETE: if NB_CLASSES > 2: cost_batch = sess.run(cost, feed_dict={ rate_batch: train_rates, item_batch: train_items, user_batch: train_users, logits_cdf: train_logits_cdf }) # print(train_users[42]) # print(train_items[42]) # print(train_logits_pdf[42]) # print(train_logits_cdf[42]) # print('thr', all_thresholds) # print('infer', train_infer[42]) train_cost.append(cost_batch) train_acc.append(train_infer == train_rates) train_obo.append(abs(train_infer - train_rates) <= 1) train_se.append(np.power(train_infer - train_rates, 2)) else: nll_batch = sess.run(cost_nll, feed_dict={ rate_batch: train_rates, logits: train_logits }) proba_batch = ops.sigmoid(train_logits) train_acc.append(np.round(proba_batch) == train_rates) train_auc.append(roc_auc_score(train_rates, proba_batch)) train_nll.append(nll_batch) else: l2_batch = sess.run(cost_l2, feed_dict={ rate_batch: train_rates, infer: train_infer }) #print('est-ce', np.sum(np.power(train_rates - train_pred_batch, 2))) #print('que = ', l2_batch) #train_se.append(np.power(l2_batch, 2)) train_se.append(np.power(train_rates - train_infer, 2)) if i % nb_batches == 0: # Compute test error train_rmse = np.sqrt(np.mean(train_se)) train_macc = np.mean(train_acc) train_mobo = np.mean(train_obo) train_mauc = np.mean(train_auc) train_mnll = np.mean(train_nll) / BATCH_SIZE train_mcost = np.mean(train_cost) test_se = [] test_acc = [] test_obo = [] test_auc = 0 test_nll = [] test_cost = [] for test_users, test_items, test_rates, test_wins, test_fails in iter_test: test_logits, test_infer = sess.run( [logits, infer], feed_dict={ user_batch: test_users, item_batch: test_items, wins_batch: test_wins, fails_batch: test_fails }) test_size = len(test_rates) # print(test_logits_cdf[42], test_logits_pdf[42]) # print(test_infer[42], test_rates[42]) if DISCRETE: if NB_CLASSES > 2: cost_batch = sess.run(cost, feed_dict={ rate_batch: test_rates, item_batch: test_items, user_batch: test_users }) #print(cost_batch) test_cost.append(cost_batch) test_acc.append(test_infer == test_rates) test_obo.append(abs(test_infer - test_rates) <= 1) test_se.append(np.power(test_infer - test_rates, 2)) else: #train_cost.append(cost_batch) nll_batch = sess.run(cost_nll, feed_dict={ rate_batch: test_rates, logits: test_logits }) proba_batch = ops.sigmoid(test_logits) test_acc.append( np.round(proba_batch) == test_rates) test_auc = roc_auc_score(test_rates, proba_batch) # print(proba_batch[:5], test_rates[:5], test_auc) test_nll.append(nll_batch) else: l2_batch = sess.run(cost_l2, feed_dict={ rate_batch: rates, infer: pred_batch }) test_se.append(np.power(rates - pred_batch, 2)) end = time.time() test_rmse = np.sqrt(np.mean(test_se)) test_macc = np.mean(test_acc) test_mobo = np.mean(test_obo) test_mnll = np.mean(test_nll) / len(test) test_mcost = np.mean(test_cost) if DISCRETE: if NB_CLASSES > 2: print( "{:3d} TRAIN(size={:d}/{:d}, macc={:f}, mobo={:f}, rmse={:f}, mcost={:f}) TEST(size={:d}, macc={:f}, mobo={:f}, rmse={:f}, mcost={:f}) {:f}(s)" .format(i // nb_batches, len(train_users), len(train), train_macc, train_mobo, train_rmse, train_mcost, len(test), test_macc, test_mobo, test_rmse, test_mcost, end - start)) else: print( "{:3d} TRAIN(size={:d}/{:d}, macc={:f}, mauc={:f}, mnll={:f}) TEST(size={:d}, macc={:f}, auc={:f}, mnll={:f}) {:f}(s)" .format( i // nb_batches, len(train_users), len(train), #train_rmse, # rmse={:f} train_macc, train_mauc, train_mnll, len(test), #test_rmse, # rmse={:f} test_macc, test_auc, test_mnll, end - start)) else: print( "{:3d} TRAIN(size={:d}/{:d}, rmse={:f}) TEST(size={:d}, rmse={:f}) {:f}(s)" .format( i // nb_batches, len(train_users), len(train), train_rmse, # rmse={:f} #train_macc, train_mauc, train_mnll, len(test), test_rmse, # rmse={:f} #test_macc, test_mauc, test_mnll, end - start)) train_err_summary = make_scalar_summary( "training_error", train_rmse) test_err_summary = make_scalar_summary("test_error", test_rmse) summary_writer.add_summary(train_err_summary, i) summary_writer.add_summary(test_err_summary, i) start = end # print('thr', all_thresholds) # Save model print(os.path.join(BASE_DIR, 'fm.ckpt')) saver.save(sess, os.path.join(BASE_DIR, 'fm.ckpt'))
def svd_with_pipe(samples_per_batch): trainfilequeue = tf.train.string_input_producer( ["/tmp/movielens/ml-1m/ratings.dat"], num_epochs=None, shuffle=False) testfilequeue = tf.train.string_input_producer( ["/tmp/movielens/ml-1m/ratings.dat"], num_epochs=None, shuffle=False) reader = tf.TextLineReader() user_batch, item_batch, rate_batch = shuffleInputPipeline( trainfilequeue, reader, BATCH_SIZE, 10) testuser_batch, testitem_batch, testrate_batch = shuffleInputPipeline( testfilequeue, reader, BATCH_SIZE, 10) infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE) global_step = tf.contrib.framework.get_or_create_global_step() _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE) init_op = tf.global_variables_initializer() sess = tf.Session() sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) testusers, testitems, testrates = sess.run( [testuser_batch, testitem_batch, testrate_batch]) errors = deque(maxlen=samples_per_batch) print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time")) try: for i in range(EPOCH_MAX * samples_per_batch): start = time.time() users, items, rates = sess.run( [user_batch, item_batch, rate_batch]) _, pred_batch = sess.run([train_op, infer], feed_dict={ user_batch: users, item_batch: items, rate_batch: rates }) pred_batch = clip(pred_batch) errors.append(np.power(pred_batch - rates, 2)) if i % samples_per_batch == 0: train_err = np.sqrt(np.mean(errors)) test_err2 = np.array([]) pred_batch = sess.run(infer, feed_dict={ user_batch: testusers, item_batch: testitems, }) pred_batch = clip(pred_batch) test_err2 = np.append(test_err2, np.power(pred_batch - testrates, 2)) end = time.time() print("{:3d} {:f} {:f} {:f}(s)".format( i // samples_per_batch, train_err, np.sqrt(np.mean(test_err2)), end - start)) start = end except tf.errors.OutOfRangeError: print('Done Training') finally: coord.request_stop() coord.join(threads) sess.close()