from tqdm import tqdm from movies_analyzer.RecommendationDataset import RecommendationDataSet from movies_analyzer.Movies import Movies from movies_analyzer.Imdb import get_imdb_movie # download all images from imdb ased on movielens, this is very long query # for 9000 could take about 6 hours, for all imdb (500,000) could take about 2 weeks. recommendation_dataset = RecommendationDataSet(movies=Movies()) # all imdb movies for tmbdid in tqdm(recommendation_dataset.movies.data.index): tmbdid = tmbdid.replace('tt', '') get_imdb_movie(tmbdid)
# return top-n movies movie_ids = [ full_dataset.to_raw_iid(i) for i in heapq.nlargest(k, candidates, key=candidates.get) ] return self.movies.get_movie_by_movie_ids(movie_ids) if __name__ == '__main__': from movies_recommender.Recommender import test_recommendation from movies_recommender.RecommenderUserBased import RecommenderUserBased from movies_analyzer.RecommendationDataset import RecommendationDataSet from movies_analyzer.Movies import Movies movies = Movies() recommendation_dataset = RecommendationDataSet(movies=movies) recommender = RecommenderUserBased(movies) assert recommender.__module__[:len('movies_recommender.' )] == 'movies_recommender.' test_recommendation(recommender, recommendation_dataset, example_items=['arek', 'mateusz'], anti_test=False) """ For test only %load_ext autoreload %autoreload 2 from filmweb_integrator.fwimdbmerge.filmweb import Filmweb from filmweb_integrator.fwimdbmerge.merger import Merger, get_json_df
if EXAMPLE_USE_JSON: logger.warning(f"Load json file: {DATA_STATIC}/{FILMWEB_EXAMPLE_CSV}") df = get_json_df(read_file(FILMWEB_EXAMPLE_JSON)) logger.warning(f"Write to csv ({len(df)})") df.to_csv(FILMWEB_EXAMPLE_CSV) else: logger.warning(f"Load csv file: {DATA_STATIC}/{FILMWEB_EXAMPLE_CSV}") df = pd.read_csv(FILMWEB_EXAMPLE_CSV) logger.warning(f"Write to json ({len(df)})") with open(FILMWEB_EXAMPLE_JSON, "w", encoding="utf-8") as file_wr: df.to_json(file_wr, orient='records') logger.warning("Start merging") movies = Movies() merger = Merger(imdb=movies.imdb) filmweb_df, merge_df = merger.get_data(df) logger.warning( f"Save merge.csv Filmweb({len(filmweb_df)}) -> IMDB({len(merge_df)})") filmweb_df.to_csv(FILMWEB_EXAMPLE_FILMWEB, index=False) merge_df.to_csv(FILMWEB_EXAMPLE_MERGE, index=True) movielens_df = movies.merge_imdb_movielens(merge_df) logger.warning( f"Save movielens merge IMDB({len(merge_df)}) -> MOVIELENS({len(movielens_df)})" ) movielens_df.to_csv(FILMWEB_EXAMPLE_MOVIELENS, index=True) logger.warning(f"Print data ({len(movielens_df)})")