from tqdm import tqdm
from movies_analyzer.RecommendationDataset import RecommendationDataSet
from movies_analyzer.Movies import Movies
from movies_analyzer.Imdb import get_imdb_movie

# download all images from imdb  ased on movielens, this is very long query
# for 9000 could take about 6 hours, for all imdb (500,000) could take about 2 weeks.
recommendation_dataset = RecommendationDataSet(movies=Movies())

# all imdb movies
for tmbdid in tqdm(recommendation_dataset.movies.data.index):
    tmbdid = tmbdid.replace('tt', '')
    get_imdb_movie(tmbdid)
Esempio n. 2
0
        # return top-n movies
        movie_ids = [
            full_dataset.to_raw_iid(i)
            for i in heapq.nlargest(k, candidates, key=candidates.get)
        ]

        return self.movies.get_movie_by_movie_ids(movie_ids)


if __name__ == '__main__':
    from movies_recommender.Recommender import test_recommendation
    from movies_recommender.RecommenderUserBased import RecommenderUserBased
    from movies_analyzer.RecommendationDataset import RecommendationDataSet
    from movies_analyzer.Movies import Movies

    movies = Movies()
    recommendation_dataset = RecommendationDataSet(movies=movies)
    recommender = RecommenderUserBased(movies)

    assert recommender.__module__[:len('movies_recommender.'
                                       )] == 'movies_recommender.'
    test_recommendation(recommender,
                        recommendation_dataset,
                        example_items=['arek', 'mateusz'],
                        anti_test=False)
    """ For test only
    %load_ext autoreload
    %autoreload 2
    
    from filmweb_integrator.fwimdbmerge.filmweb import Filmweb
    from filmweb_integrator.fwimdbmerge.merger import Merger, get_json_df
if EXAMPLE_USE_JSON:
    logger.warning(f"Load json file:  {DATA_STATIC}/{FILMWEB_EXAMPLE_CSV}")
    df = get_json_df(read_file(FILMWEB_EXAMPLE_JSON))

    logger.warning(f"Write to csv ({len(df)})")
    df.to_csv(FILMWEB_EXAMPLE_CSV)
else:
    logger.warning(f"Load csv file:  {DATA_STATIC}/{FILMWEB_EXAMPLE_CSV}")
    df = pd.read_csv(FILMWEB_EXAMPLE_CSV)

    logger.warning(f"Write to json ({len(df)})")
    with open(FILMWEB_EXAMPLE_JSON, "w", encoding="utf-8") as file_wr:
        df.to_json(file_wr, orient='records')

logger.warning("Start merging")
movies = Movies()
merger = Merger(imdb=movies.imdb)
filmweb_df, merge_df = merger.get_data(df)

logger.warning(
    f"Save merge.csv Filmweb({len(filmweb_df)}) -> IMDB({len(merge_df)})")
filmweb_df.to_csv(FILMWEB_EXAMPLE_FILMWEB, index=False)
merge_df.to_csv(FILMWEB_EXAMPLE_MERGE, index=True)

movielens_df = movies.merge_imdb_movielens(merge_df)
logger.warning(
    f"Save movielens merge IMDB({len(merge_df)}) -> MOVIELENS({len(movielens_df)})"
)
movielens_df.to_csv(FILMWEB_EXAMPLE_MOVIELENS, index=True)

logger.warning(f"Print data ({len(movielens_df)})")