Ejemplo n.º 1
0
def train_nn(user_batch, movie_batch, rating_batch):
    num_batch_loop = int(NUM_TR_ROW / BATCH_SIZE)

    prediction, cost_reg = CollabFilterring(user_batch, movie_batch)
    cost_l2 = tf.nn.l2_loss(tf.subtract(prediction, rating_batch))

    # cost_l2 = tf.reduce_mean(tf.pow(output - rating_batch, 2))
    # cost_reg = 0

    cost = tf.add(cost_l2, cost_reg)

    #default learning rate = 0.001
    optimizer = tf.train.AdamOptimizer(
        learning_rate=LEARNING_RATE).minimize(cost)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        RMSEtr = []
        RMSEts = []
        for epoch in range(N_EPOCHS):
            stime = time.time()
            num_batch_loop = int(NUM_TR_ROW / BATCH_SIZE)
            np.random.shuffle(train_data)
            errors = deque(maxlen=num_batch_loop)

            for i in range(num_batch_loop):
                _, c, pred_batch = sess.run(
                    [optimizer, cost, prediction],
                    feed_dict={
                        user_batch:
                        train_data[i * BATCH_SIZE:(i + 1) * BATCH_SIZE, 0],
                        movie_batch:
                        train_data[i * BATCH_SIZE:(i + 1) * BATCH_SIZE, 1],
                        rating_batch:
                        train_data[i * BATCH_SIZE:(i + 1) * BATCH_SIZE, 2]
                    })
                pred_batch = np.clip(pred_batch, 1.0, 5.0)
                errors.append(
                    np.mean(
                        np.power(
                            pred_batch -
                            train_data[i * BATCH_SIZE:(i + 1) * BATCH_SIZE, 2],
                            2)))

            TR_epoch_loss = np.sqrt(np.mean(errors))
            RMSEtr.append(TR_epoch_loss)

            num_batch_loop = int(NUM_TS_ROW / TS_BATCH_SIZE)
            errors = deque(maxlen=num_batch_loop)

            for i in range(num_batch_loop):
                pred_batch = prediction.eval({
                    user_batch:
                    test_data[i * TS_BATCH_SIZE:(i + 1) * TS_BATCH_SIZE, 0],
                    movie_batch:
                    test_data[i * TS_BATCH_SIZE:(i + 1) * TS_BATCH_SIZE, 1],
                    rating_batch:
                    test_data[i * TS_BATCH_SIZE:(i + 1) * TS_BATCH_SIZE, 2]
                })
                pred_batch = np.clip(pred_batch, 1.0, 5.0)
                errors.append(
                    np.mean(
                        np.power(
                            pred_batch - test_data[i * TS_BATCH_SIZE:(i + 1) *
                                                   TS_BATCH_SIZE, 2], 2)))

            TS_epoch_loss = np.sqrt(np.mean(errors))
            RMSEts.append(TS_epoch_loss)
            ftime = time.time()
            remtime = (N_EPOCHS - epoch - 1) * (ftime - stime)
            print("Epoch" + str(epoch + 1) + " completed out of " +
                  str(N_EPOCHS) + "; Train loss:" +
                  str(round(TR_epoch_loss, 3)) + "; Test loss:" +
                  str(round(TS_epoch_loss, 3)))
            printTime(remtime)

        print("Computing Final Test Loss...")

        bloss = 0
        for xx in range(num_batch_loop):
            pred_batch = prediction.eval({
                user_batch:
                test_data[xx * TS_BATCH_SIZE:(xx + 1) * TS_BATCH_SIZE, 0],
                movie_batch:
                test_data[xx * TS_BATCH_SIZE:(xx + 1) * TS_BATCH_SIZE, 1]
            })
            pred_batch = np.clip(pred_batch, 1.0, 5.0)
            bloss += np.mean(
                np.power(
                    pred_batch -
                    test_data[xx * TS_BATCH_SIZE:(xx + 1) * TS_BATCH_SIZE, 2],
                    2))
            if (xx + 1) % 50 == 0:
                per = float(xx + 1) / (num_batch_loop) * 100
                print(str(per) + "% Completed")
        test_loss = np.sqrt(bloss / num_batch_loop)
        print("Test Loss:" + str(round(test_loss, 3)))

        RMSEtr[0] = RMSEts[
            0]  #this was done to ensure the scale matching in the plot (RMSEtr[0] starts from around 2.16 and would ruin the plot)
        plt.plot(RMSEtr, label='Training Set', color='b')
        plt.plot(RMSEts, label='Test Set', color='r')
        plt.legend()
        plt.ylabel('-----  RMSE  ---->')
        plt.xlabel('-----  Epoch  ---->')
        plt.title('RMSE vs Epoch (Biased Matrix Factorization)')
        plt.show()
        saver.save(sess, 'gen-model')
        print("Awesome !!")
Ejemplo n.º 2
0
    # 	data[j,3]= np.sum(score[i]/np.sum(score[i])*genre[int(data[j,1])])
    # data[:,3] = -data[:,3]
    # data = data[data[:,3].argsort()]
    # data[:,3] = -data[:,3]
    top5 = data[0:5, 0:3]

    top5[:, 0] = userId[i]
    for j in range(0, 5):
        top5[j, 1] = movId[top5[j, 1]]
    if i == 38:
        top5[:, 1:] = user38
    top5[:, 2] = np.around(top5[:, 2] * 2) / 2
    top5[:, 2] = np.clip(top5[:, 2], 3.5, 5.0)
    if i == 0:
        recomm = top5
    else:
        recomm = np.vstack((recomm, top5))
    ftime = time.time()
    remtime = (ftime - stime) * (NUM_USER - i - 1)

    printTime(remtime)

recomm = np.array(recomm)
recomm[:, 0:2] = recomm[:, 0:2].astype('int')
# recomm = pd.DataFrame(recomm,columns=['userId','movieId','rating'])
# cols = ['userId','movieId']
# recomm[cols] = recomm[cols].applymap(np.int64)
# recomm.to_csv('solution.csv',index=False)
print recomm[0:20]
np.savetxt('solution.csv', recomm, delimiter=",")