Beispiel #1
0
def svd(train, test, learning_rate=0.0005, reg=0.02, dim=50, batch_size=1000):
    samples_per_batch = len(train) // batch_size

    iter_train = dataio.ShuffleIterator([train["user"],
                                         train["item"],
                                         train["rate"]],
                                        batch_size=batch_size)

    iter_test = dataio.OneEpochIterator([test["user"],
                                         test["item"],
                                         test["rate"]],
                                        batch_size=-1)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    rate_batch = tf.placeholder(tf.float32, shape=[None])

    infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=dim,
                                           device=DEVICE)
    global_step = tf.contrib.framework.get_or_create_global_step()
    _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=learning_rate, reg=reg, device=DEVICE)

    pid = int(os.getpid())

    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph)
        print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time"))
        errors = deque(maxlen=samples_per_batch)
        start = time.time()
        min_test_err = 9999
        for i in range(EPOCH_MAX * samples_per_batch):
            users, items, rates = next(iter_train)
            _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users,
                                                                   item_batch: items,
                                                                   rate_batch: rates})
            pred_batch = clip(pred_batch)
            errors.append(np.power(pred_batch - rates, 2))
            if i % samples_per_batch == 0:
                train_err = np.sqrt(np.mean(errors))
                test_err2 = np.array([])
                for users, items, rates in iter_test:
                    pred_batch = sess.run(infer, feed_dict={user_batch: users,
                                                            item_batch: items})
                    pred_batch = clip(pred_batch)
                    test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2))
                end = time.time()
                test_err = np.sqrt(np.mean(test_err2))
                min_test_err = min(test_err, min_test_err)
                print("{:5d} {:3d} {:f} {:f} {:f} {:f}(s)".format(pid, i // samples_per_batch, train_err, test_err, min_test_err,
                                                       end - start))
                train_err_summary = make_scalar_summary("training_error", train_err)
                test_err_summary = make_scalar_summary("test_error", test_err)
                summary_writer.add_summary(train_err_summary, i)
                summary_writer.add_summary(test_err_summary, i)
                start = end
                sys.stdout.flush()
df_movies.iloc[0].title




# # Network + train + test





samples_per_batch = len(df_train) // BATCH_SIZE

iter_train = dataio.ShuffleIterator([df_train["user"],
                                     df_train["item"],
                                    df_train["rate"]],
                                    batch_size=BATCH_SIZE)

iter_test = dataio.OneEpochIterator([df_test["user"],
                                     df_test["item"],
                                    df_test["rate"]],
                                    batch_size=-1)

user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
rate_batch = tf.placeholder(tf.float32, shape=[None])

infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE)
global_step = tf.contrib.framework.get_or_create_global_step()
_, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE)
def svd(train, test):
    samples_per_batch = len(train) // BATCH_SIZE

    iter_train = dataio.ShuffleIterator([train["user"],
                                         train["item"],
                                         train["rate"]],
                                        batch_size=BATCH_SIZE)

    iter_test = dataio.OneEpochIterator([test["user"],
                                         test["item"],
                                         test["rate"]],
                                        batch_size=-1)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    item_content_batch = tf.placeholder(tf.float32, shape=[None,ITEM_CONTENT_DIM], name="content_item")
    user_content_batch = tf.placeholder(tf.float32, shape=[None,USER_CONTENT_DIM], name="content_user")
    rate_batch = tf.placeholder(tf.float32, shape=[None])

    infer, regularizer = cdl.inference(user_batch,user_content_batch,item_batch,item_content_batch,user_num=USER_NUM
                                       ,item_num=ITEM_NUM,dim=DIM
                                       ,item_autoencoder_input_dim=ITEM_CONTENT_DIM,item_autoencoder_hidden_dims=[50,DIM,50]
                                       ,user_autoencoder_input_dim=USER_CONTENT_DIM,user_autoencoder_hidden_dims=[30,DIM,30]
                                       ,device="/gpu:0")
    #infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM,
    #                                       device=DEVICE)
    global_step = tf.contrib.framework.get_or_create_global_step()
    _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE)

    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log", graph=sess.graph)
        print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time"))
        errors = deque(maxlen=samples_per_batch)
        start = time.time()
        for i in range(EPOCH_MAX * samples_per_batch):
            users, items, rates = next(iter_train)
            items_content = np.random.randn(BATCH_SIZE,ITEM_CONTENT_DIM).astype(np.float32)
            user_content = np.random.randn(BATCH_SIZE,USER_CONTENT_DIM).astype(np.float32)
            _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users,
                                                                   item_batch: items,
                                                                   rate_batch: rates,
                                                                   item_content_batch: items_content,
                                                                   user_content_batch: user_content
                                                                   })
            pred_batch = clip(pred_batch)
            errors.append(np.power(pred_batch - rates, 2))
            if i % samples_per_batch == 0:
                train_err = np.sqrt(np.mean(errors))
                test_err2 = np.array([])
                for users, items, rates in iter_test:
                    items_content = np.random.randn(len(users),ITEM_CONTENT_DIM).astype(np.float32)
                    user_content = np.random.randn(len(users),USER_CONTENT_DIM).astype(np.float32)
                    pred_batch = sess.run(infer, feed_dict={user_batch: users,
                                                            item_batch: items,
                                                            item_content_batch: items_content,
                                                            user_content_batch: user_content
                                                            })
                    pred_batch = clip(pred_batch)
                    test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2))
                end = time.time()
                test_err = np.sqrt(np.mean(test_err2))
                print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err,
                                                       end - start))
                train_err_summary = make_scalar_summary("training_error", train_err)
                test_err_summary = make_scalar_summary("test_error", test_err)
                summary_writer.add_summary(train_err_summary, i)
                summary_writer.add_summary(test_err_summary, i)
                start = end
Beispiel #4
0
def svd(train, test):
    samples_per_batch = len(train) // BATCH_SIZE

    iter_train = dataio.ShuffleIterator(
        [train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE)

    iter_test = dataio.OneEpochIterator(
        [test["user"], test["item"], test["rate"]], batch_size=-1)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    rate_batch = tf.placeholder(tf.float32, shape=[None])

    infer, regularizer = ops.inference_svd(user_batch,
                                           item_batch,
                                           user_num=USER_NUM,
                                           item_num=ITEM_NUM,
                                           dim=DIM,
                                           device=DEVICE)
    _, train_op = ops.optimiaztion(infer,
                                   regularizer,
                                   rate_batch,
                                   learning_rate=0.15,
                                   reg=0.05,
                                   device=DEVICE)

    init_op = tf.initialize_all_variables()
    with tf.Session() as sess:
        sess.run(init_op)
        print("{} {} {} {}".format("epoch", "train_error", "val_error",
                                   "elapsed_time"))
        errors = deque(maxlen=samples_per_batch)
        start = time.time()
        for i in range(EPOCH_MAX * samples_per_batch):
            users, items, rates = next(iter_train)
            _, pred_batch = sess.run([train_op, infer],
                                     feed_dict={
                                         user_batch: users,
                                         item_batch: items,
                                         rate_batch: rates
                                     })
            pred_batch = clip(pred_batch)
            errors.append(np.power(pred_batch - rates, 2))
            if i % samples_per_batch == 0:
                train_err = np.sqrt(np.mean(errors))
                test_err2 = np.array([])
                for users, items, rates in iter_test:
                    pred_batch = sess.run(infer,
                                          feed_dict={
                                              user_batch: users,
                                              item_batch: items
                                          })
                    pred_batch = clip(pred_batch)
                    test_err2 = np.append(test_err2,
                                          np.power(pred_batch - rates, 2))
                end = time.time()
                print("{:3d} {:f} {:f} {:f}(s)".format(
                    i // samples_per_batch, train_err,
                    np.sqrt(np.mean(test_err2)), end - start))
                start = end

        output_graph_def = tf.python.framework.graph_util.extract_sub_graph(
            sess.graph.as_graph_def(), ["svd_inference", "svd_regularizer"])
        tf.train.SummaryWriter(logdir="/tmp/svd", graph_def=output_graph_def)
Beispiel #5
0
def svd(X_train, X_test, feedback_u, DIM, LAMBDA):
    'Main SVD code'

    # learning rate
    learning = LR

    # finding the number of batches in train data
    samples_per_batch = len(X_train) // BATCH_SIZE

    # initialize earlys topping parameters
    min_err = 100  # store minimum error
    counter = 0  # count number of times validation error was above minimum

    # build iterator objects for train and validation sets
    iter_train = dataio.ShuffleIterator(
        [X_train["user"], X_train["item"], X_train["rate"]],
        batch_size=BATCH_SIZE)

    iter_val = dataio.OneEpochIterator(
        [X_test["user"], X_test["item"], X_test["rate"]],
        batch_size=BATCH_SIZE)
    '''iter_test = dataio.OneEpochIterator([test["user"],
                                             test["item"],
                                             test["rate"]],
                                            batch_size=BATCH_SIZE)'''

    # start tensorflow with empty graph (needed when calling svd function multiply times i.e kfold validation)
    with tf.Graph().as_default():

        # Define tensor placeholders (tensor objects that you feed into tensor functions)
        user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
        item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
        rate_batch = tf.placeholder(tf.float32, shape=[None])
        feedback_batch = tf.placeholder(tf.float32, shape=[None, ITEM_NUM])
        feedback_mat = tf.placeholder(tf.float32, shape=[USER_NUM, ITEM_NUM])

        infer, regularizer = ops.inference_svd(user_batch,
                                               item_batch,
                                               feedback_batch,
                                               user_num=USER_NUM,
                                               item_num=ITEM_NUM,
                                               dim=DIM,
                                               device=DEVICE)
        _, train_op = ops.optimiaztion(infer,
                                       regularizer,
                                       rate_batch,
                                       learning_rate=LR,
                                       reg=LAMBDA,
                                       device=DEVICE)

        full_ratings = ops.get_pred(feedback_mat, ITEM_NUM, USER_NUM, DIM,
                                    DEVICE)

        # Initialize all variables function
        init_op = tf.initialize_all_variables()

        # Start the tensorflow session
        with tf.Session() as sess:

            # initialize variables
            sess.run(init_op)

            print("{} {} {} {}".format("epoch", "train_error", "val_error",
                                       "elapsed_time"))
            errors = deque(maxlen=samples_per_batch)

            # Time each epoch
            start = time.time()

            # Iterate through epochs
            for i in range(EPOCH_MAX * samples_per_batch):

                # Generate batch data
                users, items, rates = next(iter_train)
                feedback = feedback_u[users.astype('int'), :]

                # Run the training functions
                _, pred_batch = sess.run(
                    [train_op, infer],
                    feed_dict={
                        user_batch: users,
                        item_batch: items,
                        rate_batch: rates,
                        feedback_batch: feedback
                    })
                pred_batch = clip(pred_batch)
                errors.append(np.power(pred_batch - rates, 2))

                # Do prediction on the validation set
                if i % samples_per_batch == 0:  #end of epoch
                    train_err = np.sqrt(np.mean(errors))  #train rmse
                    test_err2 = np.array([])  # test rmse

                    # predict validation set using iterator
                    for users, items, rates in iter_val:
                        feedback = feedback_u[users.astype('int'), :]
                        pred_batch = sess.run(infer,
                                              feed_dict={
                                                  user_batch: users,
                                                  item_batch: items,
                                                  feedback_batch: feedback
                                              })
                        pred_batch = clip(pred_batch)
                        test_err2 = np.append(test_err2,
                                              np.power(pred_batch - rates, 2))
                    end = time.time()  # end timer

                    # Validation error
                    RMSE_val = np.sqrt(np.mean(test_err2))
                    print("{:3d} {:f} {:f} {:f}(s)".format(
                        i // samples_per_batch, train_err, RMSE_val,
                        end - start))

                    start = end  #reset clock

                    # Early stopping check: update minimum error variable if needed, if it did not minimize any further
                    # beyond 50 steps, stop the training and print error
                    if min_err > RMSE_val:
                        min_err = RMSE_val
                        counter = 0
                        print('Min error updated')
                    else:
                        counter += 1

                    if counter >= 100:
                        break

            # Output log information
            output_graph_def = graph_util.extract_sub_graph(
                sess.graph.as_graph_def(),
                ["svd_inference", "svd_regularizer"])
            tf.train.SummaryWriter(logdir="/tmp/svd",
                                   graph_def=output_graph_def)
            ratings_mat = sess.run(full_ratings,
                                   feed_dict={feedback_mat: feedback_u})

    return min_err, clip(ratings_mat)
Beispiel #6
0
def svd(train, test):

    # 获取训练集的长度
    samples_per_batch = len(train) // BATCH_SIZE

    # 把列数据转变成行数据,然后随机打散
    iter_train = dataio.ShuffleIterator([train["user"],
                                         train["item"],
                                         train["rate"]],
                                        batch_size=BATCH_SIZE)
    print(iter_train)

    iter_test = dataio.OneEpochIterator([test["user"],
                                         test["item"],
                                         test["rate"]],
                                        batch_size=-1)
    print(iter_test)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    rate_batch = tf.placeholder(tf.float32, shape=[None])

    infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM,
                                           device=DEVICE)
    global_step = tf.contrib.framework.get_or_create_global_step()
    _, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE)

    # 初始化变量
    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)

        # 定义日志输入环境,需要跟ternsorboard的地址对应上
        summary_writer = tf.summary.FileWriter(logdir="/log", graph=sess.graph)

        print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time"))
        
        errors = deque(maxlen=samples_per_batch)
        start = time.time()
        for i in range(EPOCH_MAX * samples_per_batch):
            
            users, items, rates = next(iter_train)
            _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users,
                                                                   item_batch: items,
                                                                   rate_batch: rates})
            pred_batch = clip(pred_batch)
            # np.power指数,计算平方差
            errors.append(np.power(pred_batch - rates, 2))
            
            # 输出信息
            if i % samples_per_batch == 0:
                train_err = np.sqrt(np.mean(errors))
                test_err2 = np.array([])
                for users, items, rates in iter_test:
                    pred_batch = sess.run(infer, feed_dict={user_batch: users,
                                                            item_batch: items})
                    pred_batch = clip(pred_batch)
                    test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2))
                end = time.time()
                test_err = np.sqrt(np.mean(test_err2))
                print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err,
                                                       end - start))
                train_err_summary = make_scalar_summary("training_error", train_err)
                test_err_summary = make_scalar_summary("test_error", test_err)
                summary_writer.add_summary(train_err_summary, i)
                summary_writer.add_summary(test_err_summary, i)
                start = end
Beispiel #7
0
def svd(train, test):
    nb_batches = len(train) // BATCH_SIZE

    iter_train = dataio.ShuffleIterator([
        train["user"], train["item"], train["outcome"], train["wins"],
        train["fails"]
    ],
                                        batch_size=BATCH_SIZE)

    iter_test = dataio.OneEpochIterator([
        test["user"], test["item"], test["outcome"], test["wins"],
        test["fails"]
    ],
                                        batch_size=-1)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    rate_batch = tf.placeholder(tf.float32, shape=[None])
    wins_batch = tf.placeholder(tf.float32, shape=[None], name="nb_wins")
    fails_batch = tf.placeholder(tf.float32, shape=[None], name="nb_fails")

    # infer, logits, logits_cdf, logits_pdf, regularizer, user_bias, user_features, item_bias, item_features, thresholds = ops.inference_svd(user_batch, item_batch, wins_batch, fails_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE)
    infer, logits, regularizer, user_bias, user_features, item_bias, item_features = ops.inference_svd(
        user_batch,
        item_batch,
        wins_batch,
        fails_batch,
        user_num=USER_NUM,
        item_num=ITEM_NUM,
        dim=DIM,
        device=DEVICE)
    global_step = tf.train.get_or_create_global_step()
    #cost_l2, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE)
    cost_nll, train_op = ops.optimization(infer,
                                          logits,
                                          regularizer,
                                          rate_batch,
                                          learning_rate=LEARNING_RATE,
                                          reg=LAMBDA_REG,
                                          device=DEVICE)
    #cost, train_op = ops.optimization(infer, logits, logits_cdf, logits_pdf, regularizer, rate_batch, learning_rate=LEARNING_RATE, reg=LAMBDA_REG, device=DEVICE)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(logdir="/tmp/svd/log",
                                               graph=sess.graph)
        print("{} {} {} {}".format("epoch", "train_error", "val_error",
                                   "elapsed_time"))
        train_se = deque(maxlen=nb_batches)
        train_nll = deque(maxlen=nb_batches)
        train_cost = deque(maxlen=nb_batches)
        train_acc = deque(maxlen=nb_batches)
        train_obo = deque(maxlen=nb_batches)
        train_auc = deque(maxlen=nb_batches)
        start = time.time()
        for i in range(EPOCH_MAX * nb_batches):
            train_users, train_items, train_rates, train_wins, train_fails = next(
                iter_train)
            batch_size = len(train_rates)

            _, train_logits, train_infer = sess.run(
                [train_op, logits, infer],
                feed_dict={
                    user_batch: train_users,
                    item_batch: train_items,
                    rate_batch: train_rates,
                    wins_batch: train_wins,
                    fails_batch: train_fails
                })
            #print('values', train_infer[42], train_logits[42], train_logits_cdf[42], ops.sigmoid(train_logits[42]), ops.sigmoid(train_logits_cdf[42]))

            # print(train_logits_cdf[42])
            # print(train_logits_pdf[42])
            # print(train_rates[42])

            if DISCRETE:
                if NB_CLASSES > 2:
                    cost_batch = sess.run(cost,
                                          feed_dict={
                                              rate_batch: train_rates,
                                              item_batch: train_items,
                                              user_batch: train_users,
                                              logits_cdf: train_logits_cdf
                                          })
                    # print(train_users[42])
                    # print(train_items[42])
                    # print(train_logits_pdf[42])
                    # print(train_logits_cdf[42])
                    # print('thr', all_thresholds)
                    # print('infer', train_infer[42])
                    train_cost.append(cost_batch)
                    train_acc.append(train_infer == train_rates)
                    train_obo.append(abs(train_infer - train_rates) <= 1)
                    train_se.append(np.power(train_infer - train_rates, 2))
                else:
                    nll_batch = sess.run(cost_nll,
                                         feed_dict={
                                             rate_batch: train_rates,
                                             logits: train_logits
                                         })
                    proba_batch = ops.sigmoid(train_logits)
                    train_acc.append(np.round(proba_batch) == train_rates)
                    train_auc.append(roc_auc_score(train_rates, proba_batch))
                    train_nll.append(nll_batch)
            else:
                l2_batch = sess.run(cost_l2,
                                    feed_dict={
                                        rate_batch: train_rates,
                                        infer: train_infer
                                    })
                #print('est-ce', np.sum(np.power(train_rates - train_pred_batch, 2)))
                #print('que = ', l2_batch)
                #train_se.append(np.power(l2_batch, 2))
                train_se.append(np.power(train_rates - train_infer, 2))

            if i % nb_batches == 0:
                # Compute test error
                train_rmse = np.sqrt(np.mean(train_se))
                train_macc = np.mean(train_acc)
                train_mobo = np.mean(train_obo)
                train_mauc = np.mean(train_auc)
                train_mnll = np.mean(train_nll) / BATCH_SIZE
                train_mcost = np.mean(train_cost)
                test_se = []
                test_acc = []
                test_obo = []
                test_auc = 0
                test_nll = []
                test_cost = []
                for test_users, test_items, test_rates, test_wins, test_fails in iter_test:
                    test_logits, test_infer = sess.run(
                        [logits, infer],
                        feed_dict={
                            user_batch: test_users,
                            item_batch: test_items,
                            wins_batch: test_wins,
                            fails_batch: test_fails
                        })
                    test_size = len(test_rates)

                    # print(test_logits_cdf[42], test_logits_pdf[42])
                    # print(test_infer[42], test_rates[42])

                    if DISCRETE:
                        if NB_CLASSES > 2:
                            cost_batch = sess.run(cost,
                                                  feed_dict={
                                                      rate_batch: test_rates,
                                                      item_batch: test_items,
                                                      user_batch: test_users
                                                  })
                            #print(cost_batch)
                            test_cost.append(cost_batch)
                            test_acc.append(test_infer == test_rates)
                            test_obo.append(abs(test_infer - test_rates) <= 1)
                            test_se.append(np.power(test_infer - test_rates,
                                                    2))
                        else:
                            #train_cost.append(cost_batch)
                            nll_batch = sess.run(cost_nll,
                                                 feed_dict={
                                                     rate_batch: test_rates,
                                                     logits: test_logits
                                                 })
                            proba_batch = ops.sigmoid(test_logits)
                            test_acc.append(
                                np.round(proba_batch) == test_rates)
                            test_auc = roc_auc_score(test_rates, proba_batch)
                            # print(proba_batch[:5], test_rates[:5], test_auc)
                            test_nll.append(nll_batch)
                    else:
                        l2_batch = sess.run(cost_l2,
                                            feed_dict={
                                                rate_batch: rates,
                                                infer: pred_batch
                                            })
                        test_se.append(np.power(rates - pred_batch, 2))

                end = time.time()
                test_rmse = np.sqrt(np.mean(test_se))
                test_macc = np.mean(test_acc)
                test_mobo = np.mean(test_obo)
                test_mnll = np.mean(test_nll) / len(test)
                test_mcost = np.mean(test_cost)
                if DISCRETE:
                    if NB_CLASSES > 2:
                        print(
                            "{:3d} TRAIN(size={:d}/{:d}, macc={:f}, mobo={:f}, rmse={:f}, mcost={:f}) TEST(size={:d}, macc={:f}, mobo={:f}, rmse={:f}, mcost={:f}) {:f}(s)"
                            .format(i // nb_batches, len(train_users),
                                    len(train), train_macc,
                                    train_mobo, train_rmse, train_mcost,
                                    len(test), test_macc, test_mobo, test_rmse,
                                    test_mcost, end - start))
                    else:
                        print(
                            "{:3d} TRAIN(size={:d}/{:d}, macc={:f}, mauc={:f}, mnll={:f}) TEST(size={:d}, macc={:f}, auc={:f}, mnll={:f}) {:f}(s)"
                            .format(
                                i // nb_batches,
                                len(train_users),
                                len(train),
                                #train_rmse, # rmse={:f}
                                train_macc,
                                train_mauc,
                                train_mnll,
                                len(test),
                                #test_rmse, # rmse={:f}
                                test_macc,
                                test_auc,
                                test_mnll,
                                end - start))
                else:
                    print(
                        "{:3d} TRAIN(size={:d}/{:d}, rmse={:f}) TEST(size={:d}, rmse={:f}) {:f}(s)"
                        .format(
                            i // nb_batches,
                            len(train_users),
                            len(train),
                            train_rmse,  # rmse={:f} 
                            #train_macc, train_mauc, train_mnll,
                            len(test),
                            test_rmse,  # rmse={:f} 
                            #test_macc, test_mauc, test_mnll,
                            end - start))
                train_err_summary = make_scalar_summary(
                    "training_error", train_rmse)
                test_err_summary = make_scalar_summary("test_error", test_rmse)
                summary_writer.add_summary(train_err_summary, i)
                summary_writer.add_summary(test_err_summary, i)
                start = end
        # print('thr', all_thresholds)

        # Save model
        print(os.path.join(BASE_DIR, 'fm.ckpt'))
        saver.save(sess, os.path.join(BASE_DIR, 'fm.ckpt'))
def timesvdplusplus(train, test, binsize, ut_mean, maxtime):
    samples_per_batch = BATCH_SIZE

    iter_train = dataio.ShuffleIterator(
        [train["user"], train["item"], train["rate"], train["st"]],
        batch_size=BATCH_SIZE)

    iter_test = dataio.ShuffleIterator(
        [test["user"], test["item"], test["rate"], test["st"]],
        batch_size=BATCH_SIZE)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    rate_batch = tf.placeholder(tf.float32, shape=[None])
    time_batch = tf.placeholder(tf.int32, shape=[None])
    rmat_batch = tf.placeholder(tf.float32,
                                shape=[USER_NUM, ITEM_NUM],
                                name="rmat")
    tu_batch = tf.placeholder(tf.int32, shape=[None])
    # infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM,
    #                                        device=DEVICE)
    infer, regularizer = ops.inference_timesvdplusplus(user_batch,
                                                       item_batch,
                                                       time_batch,
                                                       rmat_batch,
                                                       tu_batch,
                                                       binsize,
                                                       maxtime,
                                                       user_num=USER_NUM,
                                                       item_num=ITEM_NUM,
                                                       batch_size=BATCH_SIZE,
                                                       dim=DIM)
    global_step = tf.contrib.framework.get_or_create_global_step()
    _, train_op = ops.optimization(infer,
                                   regularizer,
                                   rate_batch,
                                   learning_rate=0.001,
                                   reg=0.05,
                                   device=DEVICE)

    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(logdir="log", graph=sess.graph)
        print("{} {} {} {}".format("epoch", "train_error", "val_error",
                                   "elapsed_time"))
        errors = deque(maxlen=samples_per_batch)
        start = time.time()
        for i in range(EPOCH_MAX * samples_per_batch):
            users, items, rates, times = next(iter_train)
            times = times
            rmat = np.zeros([USER_NUM, ITEM_NUM], dtype=np.float32)
            rmat[users, items] = float(1.0)
            _, pred_batch = sess.run(
                [train_op, infer],
                feed_dict={
                    user_batch: users,
                    item_batch: items,
                    rate_batch: rates,
                    time_batch: times,
                    rmat_batch: rmat,
                    tu_batch: ut_mean[users],
                })
            pred_batch = clip(pred_batch)
            errors.append(np.power(pred_batch - rates, 2))
            # print("i:{},errors:{}".format(i,np.sqrt(np.mean(errors))))
            if i % samples_per_batch == 0:
                train_err = np.sqrt(np.mean(errors))
                test_err2 = np.array([])
                users, items, rates, times = next(iter_test)
                rmat = np.zeros([USER_NUM, ITEM_NUM], dtype=np.float32)
                rmat[users, items] = float(1.0)
                # print("i:{},users:{},items:{}".format(i,users,items))
                pred_batch = sess.run(infer,
                                      feed_dict={
                                          user_batch: users,
                                          item_batch: items,
                                          time_batch: times,
                                          rmat_batch: rmat,
                                          tu_batch: ut_mean[users],
                                      })
                pred_batch = clip(pred_batch)
                test_err2 = np.append(test_err2,
                                      np.power(pred_batch - rates, 2))
                end = time.time()
                test_err = np.sqrt(np.mean(test_err2))
                print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch,
                                                       train_err, test_err,
                                                       end - start))
                train_err_summary = make_scalar_summary(
                    "training_error", train_err)
                test_err_summary = make_scalar_summary("test_error", test_err)
                summary_writer.add_summary(train_err_summary, i)
                summary_writer.add_summary(test_err_summary, i)
                start = end
def svdplusplus(train, test):
    samples_per_batch = BATCH_SIZE

    iter_train = dataio.ShuffleIterator(
        [train["user"], train["item"], train["rate"]], batch_size=BATCH_SIZE)

    iter_test = dataio.ShuffleIterator(
        [test["user"], test["item"], test["rate"]], batch_size=BATCH_SIZE)

    user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
    item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
    rate_batch = tf.placeholder(tf.float32, shape=[None])
    rmat_batch = tf.placeholder(tf.float32,
                                shape=[USER_NUM, ITEM_NUM],
                                name="rmat")
    onecount_sqrt_batch = tf.placeholder(tf.float32,
                                         shape=[USER_NUM],
                                         name="onecount_sqrt")

    # infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM,
    #                                        device=DEVICE)
    infer, regularizer = ops.inference_svdplusplus(user_batch,
                                                   item_batch,
                                                   rmat_batch,
                                                   user_num=USER_NUM,
                                                   item_num=ITEM_NUM,
                                                   batch_size=BATCH_SIZE,
                                                   dim=DIM)
    global_step = tf.contrib.framework.get_or_create_global_step()
    _, train_op = ops.optimization(infer,
                                   regularizer,
                                   rate_batch,
                                   learning_rate=0.001,
                                   reg=0.05,
                                   device=DEVICE)

    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(logdir="log", graph=sess.graph)
        print("{} {} {} {}".format("epoch", "train_error", "val_error",
                                   "elapsed_time"))
        errors = deque(maxlen=samples_per_batch)
        start = time.time()
        for i in range(EPOCH_MAX * samples_per_batch):
            users, items, rates = next(iter_train)
            rvalues = train.values  # .pivot(index='user', columns='item', values='rate').values #convert panda dataframe to numpy arrays.
            rmat = np.zeros((USER_NUM, ITEM_NUM), dtype=float)
            rows, row_pos = np.unique(rvalues[:, 1], return_inverse=True)
            cols, col_pos = np.unique(rvalues[:, 2], return_inverse=True)
            rmat[row_pos, col_pos] = rvalues[:, 3]
            #rmat = rmat[users[:,None], items]
            _, pred_batch = sess.run(
                [train_op, infer],
                feed_dict={
                    user_batch: users,
                    item_batch: items,
                    rate_batch: rates,
                    rmat_batch: rmat,
                })
            pred_batch = clip(pred_batch)
            errors.append(np.power(pred_batch - rates, 2))
            # print("i:{},errors:{}".format(i,np.sqrt(np.mean(errors))))
            if i % samples_per_batch == 0:
                train_err = np.sqrt(np.mean(errors))
                test_err2 = np.array([])
                users, items, rates = next(iter_test)
                # print("i:{},users:{},items:{}".format(i,users,items))
                pred_batch = sess.run(infer,
                                      feed_dict={
                                          user_batch: users,
                                          item_batch: items,
                                          rmat_batch: rmat,
                                      })
                pred_batch = clip(pred_batch)
                test_err2 = np.append(test_err2,
                                      np.power(pred_batch - rates, 2))
                end = time.time()
                test_err = np.sqrt(np.mean(test_err2))
                print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch,
                                                       train_err, test_err,
                                                       end - start))
                train_err_summary = make_scalar_summary(
                    "training_error", train_err)
                test_err_summary = make_scalar_summary("test_error", test_err)
                summary_writer.add_summary(train_err_summary, i)
                summary_writer.add_summary(test_err_summary, i)
                start = end