users = separated_ratings.userId.unique() movies = separated_ratings.movieId.unique() rating = separated_ratings.rating.unique() print("Users: ", len(users), ", Movies: ", len(movies), ", Ratings: ", len(rating)) userid2idx = {o: i for i, o in enumerate(users)} movieid2idx = {o: i for i, o in enumerate(movies)} movieidx2id = {i: o for i, o in enumerate(movies)} new_ratings = separated_ratings.copy() new_ratings.movieId = separated_ratings.movieId.apply(lambda x: movieid2idx[x]) new_ratings.userId = separated_ratings.userId.apply(lambda x: userid2idx[x]) ## split data into training and validation set, 80% in training, 20% in validation msk = np.random.rand(len(new_ratings)) < 0.8 train = new_ratings[msk] valid = new_ratings[~msk] print("training data: ", train.shape, ",validation data: ", valid.shape) #neural_net.baseline(train, valid, users, movies) #neural_net.multilayer_LSTM(train, valid, users, movies) predicted_ratings = neural_net.bidirectional_LSTM(train, valid, users, movies, separated_sample_submission) #neural_net.bidirectional_LSTM_2(train, valid, users, movies) reader_writer.write_to_file(predicted_ratings, 'sample_submission.csv', 'predicted_ratings.csv')