def get_meta(title, m_id): rec = Recommendation() rec.filter_genres() rec.filter_productions() df_movies = rec.md df_credits = pd.read_csv(PATH_CREDITS) df_imdb_link = pd.read_csv(PATH_MOVIELENS_TO_TMDB) attributes = [ "id", "original_title", "genres", "homepage", "overview", "release_date", "production_companies", "runtime", "tagline", "vote_average", "vote_count" ] df_title = df_movies.iloc[df_movies.index[df_movies["original_title"] == title][0]][attributes] df_crew = df_credits.iloc[df_credits.index[df_credits["title"] == title] [0]][["cast", "crew"]] cast = [cast["name"] for cast in literal_eval(df_crew["cast"])[0:5]] crew = [ crew["name"] for crew in literal_eval(df_crew["crew"]) if crew["job"] in ["Director"] ] try: imdb_link = str(df_imdb_link.iloc[df_imdb_link.index[ df_imdb_link["tmdbId"] == int(m_id)][0]]["imdbId"])[:-2] imdb_link = ("https://www.imdb.com/title/tt" + "0" * (IMDB_ID_LEN - len(imdb_link)) + imdb_link) except: imdb_link = "https://www.imdb.com/search/title/?title=" + title return df_title, cast, crew, imdb_link
def home(): if "recommend" in request.args: try: title = request.args["recommend"] rec = ContentBased() did_you_mean = False df = rec.recommend(title, DEFAULT_LIMIT, full_search=True, keywords_and_desc=False, critics=False) poster_paths = get_poster_paths(df["id"].tolist(), df["original_title"].tolist()) if rec.changed_title != title and rec.changed_title != str(): did_you_mean = True else: rec.changed_title = title rec_title_meta = get_meta(rec.changed_title, None) rec_id = rec_title_meta[0]["id"] return render_template('recommendations.html', titles=df["original_title"].tolist(), images=poster_paths, votes=df["vote_average"].tolist(), m_id=df["id"].tolist(), rec_title=rec.changed_title, rec_id=rec_id, did_you_mean=did_you_mean) except: abort(404) elif "genres" in request.args: genre = request.args["genres"] if genre == "All": genre = None offset = int(request.args["offset"]) gen_rec = Recommendation() gen_rec.filter_genres() df = gen_rec.top_movies(gen_rec.md, percentile=0.85, limit=DEFAULT_LIMIT, offset=offset, genre=genre) poster_paths = get_poster_paths(df["id"].tolist(), df["original_title"].tolist()) return render_template('recommendations.html', titles=df["original_title"].tolist(), images=poster_paths, votes=df["vote_average"].tolist(), m_id=df["id"].tolist(), rec_title=request.args["genres"], offset=offset, next_offset=offset + DEFAULT_LIMIT, prev_offset=offset - DEFAULT_LIMIT, rec_id=None, did_you_mean=None) else: return render_template('homepage.html')
def recommend(self, title, limit, critics=False, full_search=False, use_pickle=True, keywords_and_desc=False): """ param: title - movie title (as in TMDB dataset) limit - no. of movies to display critics - True - will display critically acclaimed movies False - will not sort movies on basis of their imdb rankings (DEFAULT - False) full_search - True - will search using cast, crew, keywords and genre as metadata False - will search using overview and tagline as metadata (DEFAULT - False) use_pickle - True - will use pickled results False - will compute the results from scratch (DEFAULT - True) keywords_and_desc - True - will merge results of keywords and description False - will not merge results of keywords and description return: pandas DataFrame object with attributes - original_title, id, vote_average, vote_count, popularity, release_date """ rec = Recommendation() rec.filter_genres() title_index = self.verify_title(rec.md, title) if keywords_and_desc: if isfile(PATH_PICKLE_KEYWORDS) and isfile( PATH_PICKLE_DESC) and use_pickle: df_keywords = pd.read_pickle(PATH_PICKLE_KEYWORDS) df_desc = pd.read_pickle(PATH_PICKLE_DESC) rec_matrix_keywords = self.countvectorize(df_keywords) rec_matrix_desc = self.tfidf(df_desc) rec_matrix = rec_matrix_keywords + rec_matrix_desc df = df_keywords elif full_search: if isfile(PATH_PICKLE_KEYWORDS) and use_pickle: df = pd.read_pickle(PATH_PICKLE_KEYWORDS) else: df = self.make_keywords(rec.md) df.to_pickle(PATH_PICKLE_KEYWORDS) rec_matrix = self.countvectorize(df) else: if isfile(PATH_PICKLE_DESC) and use_pickle: df = pd.read_pickle(PATH_PICKLE_DESC) else: df = self.make_desc(rec.md) df.to_pickle(PATH_PICKLE_DESC) rec_matrix = self.tfidf(df) rec_movie = rec_matrix[title_index] ids = rec_movie.argsort()[::-1][1:SCAN_SIZE + 1] if critics: return rec.top_movies(df.iloc[ids], percentile=0.50, limit=limit, offset=0) else: return df.iloc[ids[:limit]][[ "original_title", "id", "vote_average", "vote_count", "popularity", "release_date", ]]