# return top-n movies movie_ids = [ full_dataset.to_raw_iid(i) for i in heapq.nlargest(k, candidates, key=candidates.get) ] return self.movies.get_movie_by_movie_ids(movie_ids) if __name__ == '__main__': from movies_recommender.Recommender import test_recommendation from movies_recommender.RecommenderUserBased import RecommenderUserBased from movies_analyzer.RecommendationDataset import RecommendationDataSet from movies_analyzer.Movies import Movies movies = Movies() recommendation_dataset = RecommendationDataSet(movies=movies) recommender = RecommenderUserBased(movies) assert recommender.__module__[:len('movies_recommender.' )] == 'movies_recommender.' test_recommendation(recommender, recommendation_dataset, example_items=['arek', 'mateusz'], anti_test=False) """ For test only %load_ext autoreload %autoreload 2 from filmweb_integrator.fwimdbmerge.filmweb import Filmweb from filmweb_integrator.fwimdbmerge.merger import Merger, get_json_df
from tqdm import tqdm from movies_analyzer.RecommendationDataset import RecommendationDataSet from movies_analyzer.Movies import Movies from movies_analyzer.Imdb import get_imdb_movie # download all images from imdb ased on movielens, this is very long query # for 9000 could take about 6 hours, for all imdb (500,000) could take about 2 weeks. recommendation_dataset = RecommendationDataSet(movies=Movies()) # all imdb movies for tmbdid in tqdm(recommendation_dataset.movies.data.index): tmbdid = tmbdid.replace('tt', '') get_imdb_movie(tmbdid)