예제 #1
0
        # return top-n movies
        movie_ids = [
            full_dataset.to_raw_iid(i)
            for i in heapq.nlargest(k, candidates, key=candidates.get)
        ]

        return self.movies.get_movie_by_movie_ids(movie_ids)


if __name__ == '__main__':
    from movies_recommender.Recommender import test_recommendation
    from movies_recommender.RecommenderUserBased import RecommenderUserBased
    from movies_analyzer.RecommendationDataset import RecommendationDataSet
    from movies_analyzer.Movies import Movies

    movies = Movies()
    recommendation_dataset = RecommendationDataSet(movies=movies)
    recommender = RecommenderUserBased(movies)

    assert recommender.__module__[:len('movies_recommender.'
                                       )] == 'movies_recommender.'
    test_recommendation(recommender,
                        recommendation_dataset,
                        example_items=['arek', 'mateusz'],
                        anti_test=False)
    """ For test only
    %load_ext autoreload
    %autoreload 2
    
    from filmweb_integrator.fwimdbmerge.filmweb import Filmweb
    from filmweb_integrator.fwimdbmerge.merger import Merger, get_json_df
예제 #2
0
from tqdm import tqdm
from movies_analyzer.RecommendationDataset import RecommendationDataSet
from movies_analyzer.Movies import Movies
from movies_analyzer.Imdb import get_imdb_movie

# download all images from imdb  ased on movielens, this is very long query
# for 9000 could take about 6 hours, for all imdb (500,000) could take about 2 weeks.
recommendation_dataset = RecommendationDataSet(movies=Movies())

# all imdb movies
for tmbdid in tqdm(recommendation_dataset.movies.data.index):
    tmbdid = tmbdid.replace('tt', '')
    get_imdb_movie(tmbdid)