def train_nn(user_batch, movie_batch, rating_batch): num_batch_loop = int(NUM_TR_ROW / BATCH_SIZE) prediction, cost_reg = CollabFilterring(user_batch, movie_batch) cost_l2 = tf.nn.l2_loss(tf.subtract(prediction, rating_batch)) # cost_l2 = tf.reduce_mean(tf.pow(output - rating_batch, 2)) # cost_reg = 0 cost = tf.add(cost_l2, cost_reg) #default learning rate = 0.001 optimizer = tf.train.AdamOptimizer( learning_rate=LEARNING_RATE).minimize(cost) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) RMSEtr = [] RMSEts = [] for epoch in range(N_EPOCHS): stime = time.time() num_batch_loop = int(NUM_TR_ROW / BATCH_SIZE) np.random.shuffle(train_data) errors = deque(maxlen=num_batch_loop) for i in range(num_batch_loop): _, c, pred_batch = sess.run( [optimizer, cost, prediction], feed_dict={ user_batch: train_data[i * BATCH_SIZE:(i + 1) * BATCH_SIZE, 0], movie_batch: train_data[i * BATCH_SIZE:(i + 1) * BATCH_SIZE, 1], rating_batch: train_data[i * BATCH_SIZE:(i + 1) * BATCH_SIZE, 2] }) pred_batch = np.clip(pred_batch, 1.0, 5.0) errors.append( np.mean( np.power( pred_batch - train_data[i * BATCH_SIZE:(i + 1) * BATCH_SIZE, 2], 2))) TR_epoch_loss = np.sqrt(np.mean(errors)) RMSEtr.append(TR_epoch_loss) num_batch_loop = int(NUM_TS_ROW / TS_BATCH_SIZE) errors = deque(maxlen=num_batch_loop) for i in range(num_batch_loop): pred_batch = prediction.eval({ user_batch: test_data[i * TS_BATCH_SIZE:(i + 1) * TS_BATCH_SIZE, 0], movie_batch: test_data[i * TS_BATCH_SIZE:(i + 1) * TS_BATCH_SIZE, 1], rating_batch: test_data[i * TS_BATCH_SIZE:(i + 1) * TS_BATCH_SIZE, 2] }) pred_batch = np.clip(pred_batch, 1.0, 5.0) errors.append( np.mean( np.power( pred_batch - test_data[i * TS_BATCH_SIZE:(i + 1) * TS_BATCH_SIZE, 2], 2))) TS_epoch_loss = np.sqrt(np.mean(errors)) RMSEts.append(TS_epoch_loss) ftime = time.time() remtime = (N_EPOCHS - epoch - 1) * (ftime - stime) print("Epoch" + str(epoch + 1) + " completed out of " + str(N_EPOCHS) + "; Train loss:" + str(round(TR_epoch_loss, 3)) + "; Test loss:" + str(round(TS_epoch_loss, 3))) printTime(remtime) print("Computing Final Test Loss...") bloss = 0 for xx in range(num_batch_loop): pred_batch = prediction.eval({ user_batch: test_data[xx * TS_BATCH_SIZE:(xx + 1) * TS_BATCH_SIZE, 0], movie_batch: test_data[xx * TS_BATCH_SIZE:(xx + 1) * TS_BATCH_SIZE, 1] }) pred_batch = np.clip(pred_batch, 1.0, 5.0) bloss += np.mean( np.power( pred_batch - test_data[xx * TS_BATCH_SIZE:(xx + 1) * TS_BATCH_SIZE, 2], 2)) if (xx + 1) % 50 == 0: per = float(xx + 1) / (num_batch_loop) * 100 print(str(per) + "% Completed") test_loss = np.sqrt(bloss / num_batch_loop) print("Test Loss:" + str(round(test_loss, 3))) RMSEtr[0] = RMSEts[ 0] #this was done to ensure the scale matching in the plot (RMSEtr[0] starts from around 2.16 and would ruin the plot) plt.plot(RMSEtr, label='Training Set', color='b') plt.plot(RMSEts, label='Test Set', color='r') plt.legend() plt.ylabel('----- RMSE ---->') plt.xlabel('----- Epoch ---->') plt.title('RMSE vs Epoch (Biased Matrix Factorization)') plt.show() saver.save(sess, 'gen-model') print("Awesome !!")
# data[j,3]= np.sum(score[i]/np.sum(score[i])*genre[int(data[j,1])]) # data[:,3] = -data[:,3] # data = data[data[:,3].argsort()] # data[:,3] = -data[:,3] top5 = data[0:5, 0:3] top5[:, 0] = userId[i] for j in range(0, 5): top5[j, 1] = movId[top5[j, 1]] if i == 38: top5[:, 1:] = user38 top5[:, 2] = np.around(top5[:, 2] * 2) / 2 top5[:, 2] = np.clip(top5[:, 2], 3.5, 5.0) if i == 0: recomm = top5 else: recomm = np.vstack((recomm, top5)) ftime = time.time() remtime = (ftime - stime) * (NUM_USER - i - 1) printTime(remtime) recomm = np.array(recomm) recomm[:, 0:2] = recomm[:, 0:2].astype('int') # recomm = pd.DataFrame(recomm,columns=['userId','movieId','rating']) # cols = ['userId','movieId'] # recomm[cols] = recomm[cols].applymap(np.int64) # recomm.to_csv('solution.csv',index=False) print recomm[0:20] np.savetxt('solution.csv', recomm, delimiter=",")