class TagMovieRatingTensor(object): def __init__(self): self.conf = ParseConfig() self.data_set_loc = self.conf.config_section_mapper("filePath").get( "data_set_loc") self.data_extractor = DataExtractor(self.data_set_loc) self.max_ratings = 5 self.ordered_ratings = [0, 1, 2, 3, 4, 5] self.ordered_movie_names = [] self.ordered_tag_names = [] self.print_list = [ "\n\nFor Tags:", "\n\nFor Movies:", "\n\nFor Ratings:" ] self.util = Util() self.tensor = self.fetchTagMovieRatingTensor() self.factors = self.util.CPDecomposition(self.tensor, 5) def fetchTagMovieRatingTensor(self): """ Create tag movie rating tensor :return: tensor """ mltags_df = self.data_extractor.get_mltags_data() tag_id_list = mltags_df["tagid"] tag_id_count = 0 tag_id_dict = {} for element in tag_id_list: if element in tag_id_dict.keys(): continue tag_id_dict[element] = tag_id_count tag_id_count += 1 name = self.util.get_tag_name_for_id(element) self.ordered_tag_names.append(name) movieid_list = mltags_df["movieid"] movieid_count = 0 movieid_dict = {} for element in movieid_list: if element in movieid_dict.keys(): continue movieid_dict[element] = movieid_count movieid_count += 1 name = self.util.get_movie_name_for_id(element) self.ordered_movie_names.append(name) tensor = np.zeros((tag_id_count, movieid_count, self.max_ratings + 1)) for index, row in mltags_df.iterrows(): tagid = row["tagid"] movieid = row["movieid"] avg_movie_rating = self.util.get_average_ratings_for_movie(movieid) for rating in range(0, int(avg_movie_rating) + 1): tagid_id = tag_id_dict[tagid] movieid_id = movieid_dict[movieid] tensor[tagid_id][movieid_id][rating] = 1 return tensor def print_latent_semantics(self, r): """ Pretty print latent semantics :param r: """ i = 0 for factor in self.factors: print(self.print_list[i]) latent_semantics = self.util.get_latent_semantics( r, factor.transpose()) self.util.print_latent_semantics(latent_semantics, self.get_factor_names(i)) i += 1 def get_factor_names(self, i): """ Obtain factor names :param i: :return: factor names """ if i == 0: return self.ordered_tag_names elif i == 1: return self.ordered_movie_names elif i == 2: return self.ordered_ratings def get_partitions(self, no_of_partitions): """ Partition factor matrices :param no_of_partitions: :return: list of groupings """ i = 0 groupings_list = [] for factor in self.factors: groupings = self.util.partition_factor_matrix( factor, no_of_partitions, self.get_factor_names(i)) groupings_list.append(groupings) i += 1 return groupings_list def print_partitioned_entities(self, no_of_partitions): """ Pretty print groupings :param no_of_partitions: """ groupings_list = self.get_partitions(no_of_partitions) i = 0 for groupings in groupings_list: print(self.print_list[i]) self.util.print_partitioned_entities(groupings) i += 1
class UserMovieRecommendation(object): def __init__(self, user_id): self.util = Util() self.genre_data = self.util.genre_data self.user_id = user_id self.watched_movies = self.util.get_all_movies_for_user(self.user_id) self.model_movies_dict = {} def get_movie_movie_matrix(self, model): """ Finds movie_tag matrix and returns movie_movie_similarity matrix :param model: :return: movie_movie_similarity matrix """ movie_latent_matrix = None movies = None if model == "LDA": movie_tag_data_frame = self.genre_data tag_df = movie_tag_data_frame.groupby( ['movieid'])['tag_string'].apply(list).reset_index() movies = tag_df.movieid.tolist() movies_tags_list = list(tag_df.tag_string) (U, Vh) = self.util.LDA(movies_tags_list, num_topics=10, num_features=len( self.genre_data.tag_string.unique())) movie_latent_matrix = self.util.get_doc_topic_matrix( U, num_docs=len(movies), num_topics=10) elif model == "SVD" or model == "PCA": movie_tag_frame = self.util.get_movie_tag_matrix() movie_tag_matrix = movie_tag_frame.values movies = list(movie_tag_frame.index.values) if model == "SVD": (U, s, Vh) = self.util.SVD(movie_tag_matrix) movie_latent_matrix = U[:, :10] else: (U, s, Vh) = self.util.PCA(movie_tag_matrix) tag_latent_matrix = U[:, :10] movie_latent_matrix = numpy.dot(movie_tag_matrix, tag_latent_matrix) elif model == "TD": tensor = self.fetch_movie_genre_tag_tensor() factors = self.util.CPDecomposition(tensor, 10) movies = self.genre_data["movieid"].unique() movies.sort() movie_latent_matrix = factors[0] elif model == "PageRank": movie_tag_frame = self.util.get_movie_tag_matrix() movie_tag_matrix = movie_tag_frame.values movies = list(movie_tag_frame.index.values) movie_latent_matrix = movie_tag_matrix latent_movie_matrix = movie_latent_matrix.transpose() movie_movie_matrix = numpy.dot(movie_latent_matrix, latent_movie_matrix) return movies, movie_movie_matrix def compute_pagerank(self): """ Function to prepare data for pageRank and calling pageRank method :return: list of (movie,weight) tuple """ (movies, movie_movie_matrix) = self.get_movie_movie_matrix("PageRank") seed_movies = self.watched_movies return self.util.compute_pagerank(seed_movies, movie_movie_matrix, movies) def get_recommendation(self, model): """ Function to recommend movies for a given user_id based on the given model :param user_id: :param model: :return: list of movies for the given user as a recommendation """ recommended_movies = [] if len(self.watched_movies) == 0: print("THIS USER HAS NOT WATCHED ANY MOVIE.\nAborting...") exit(1) if model == "PageRank": recommended_dict = self.compute_pagerank() for movie_p, weight_p in recommended_dict: if len(recommended_movies) == 5: break if movie_p not in self.watched_movies: recommended_movies.append(movie_p) elif model == "Combination": return self.get_combined_recommendation() elif model == "SVD" or model == "PCA" or model == "LDA" or model == "TD": (movies, movie_movie_matrix) = self.get_movie_movie_matrix(model) movie_row_dict = {} for i in range(0, len(movies)): if movies[i] in self.watched_movies: movie_row_dict[movies[i]] = movie_movie_matrix[i] distribution_list = self.util.get_distribution_count( self.watched_movies, 5) index = 0 for movie in self.watched_movies: movie_row = movie_row_dict[movie] labelled_movie_row = dict(zip(movies, movie_row)) num_of_movies_to_pick = distribution_list[index] for each in self.watched_movies: del labelled_movie_row[each] for each in recommended_movies: del labelled_movie_row[each] labelled_movie_row_sorted = sorted(labelled_movie_row.items(), key=operator.itemgetter(1), reverse=True) labelled_movie_row_sorted = labelled_movie_row_sorted[ 0:num_of_movies_to_pick] for (m, v) in labelled_movie_row_sorted: recommended_movies.append(m) if len(recommended_movies) == 5: break index += 1 return recommended_movies def fetch_movie_genre_tag_tensor(self): """ Create Movie Genre Tag tensor :return: tensor """ movie_list = self.genre_data["movieid"].unique() movie_list.sort() movie_count = 0 movie_dict = {} for element in movie_list: movie_dict[element] = movie_count movie_count += 1 genre_list = self.genre_data["genre"].unique() genre_list.sort() genre_count = 0 genre_dict = {} for element in genre_list: genre_dict[element] = genre_count genre_count += 1 user_df = self.genre_data[self.genre_data['movieid'].isin( self.watched_movies)] tag_list = user_df["tag_string"].unique() tag_list.sort() tag_count = 0 tag_dict = {} for element in tag_list: tag_dict[element] = tag_count tag_count += 1 tensor = numpy.zeros((movie_count, genre_count, tag_count)) for index, row in self.genre_data.iterrows(): movie = row["movieid"] genre = row["genre"] tag = row["tag_string"] if genre not in genre_list or tag not in tag_list: continue movie_id = movie_dict[movie] genre_name = genre_dict[genre] tag_name = tag_dict[tag] tensor[movie_id][genre_name][tag_name] = 1 return tensor def get_combined_recommendation(self): """ Function to combine recommendations from all models based on frequency of appearance and order :param user_id: :return: list of recommended movies """ recommended_movies = [] model_list = ["SVD", "LDA", "PCA", "PageRank", "TD"] models_present = self.model_movies_dict.keys() models_absent = list(set(model_list) - set(models_present)) for model in models_absent: self.model_movies_dict[model] = self.get_recommendation(model) model_movies_list = list(self.model_movies_dict.values()) movie_dict = Counter() for movie_list in model_movies_list: for i in range(0, len(movie_list)): movie_dict[movie_list[i]] += 1 + (len(movie_list) - i) * 0.2 movie_dict_sorted = sorted(movie_dict.items(), key=operator.itemgetter(1), reverse=True) movie_dict_sorted = movie_dict_sorted[0:5] for (m, v) in movie_dict_sorted: recommended_movies.append(m) return recommended_movies
class ActorMovieYearTensor(object): def __init__(self): self.conf = ParseConfig() self.data_set_loc = self.conf.config_section_mapper("filePath").get("data_set_loc") self.data_extractor = DataExtractor(self.data_set_loc) self.ordered_years = [] self.ordered_movie_names = [] self.ordered_actor_names = [] self.print_list = ["\n\nFor Years:", "\n\nFor Movies:", "\n\nFor Actors:"] self.util = Util() self.tensor = self.fetchActorMovieYearTensor() self.factors = self.util.CPDecomposition(self.tensor, 5) def fetchActorMovieYearTensor(self): """ Create actor movie year tensor :return: tensor """ movies_df = self.data_extractor.get_mlmovies_data() actor_df = self.data_extractor.get_movie_actor_data() movie_actor_df = actor_df.merge(movies_df, how="left", on="movieid") year_list = movie_actor_df["year"] year_count = 0 year_dict = {} for element in year_list: if element in year_dict.keys(): continue year_dict[element] = year_count year_count += 1 self.ordered_years.append(element) movieid_list = movie_actor_df["movieid"] movieid_count = 0 movieid_dict = {} for element in movieid_list: if element in movieid_dict.keys(): continue movieid_dict[element] = movieid_count movieid_count += 1 name = self.util.get_movie_name_for_id(element) self.ordered_movie_names.append(name) actorid_list = movie_actor_df["actorid"] actorid_count = 0 actorid_dict = {} for element in actorid_list: if element in actorid_dict.keys(): continue actorid_dict[element] = actorid_count actorid_count += 1 name = self.util.get_actor_name_for_id(element) self.ordered_actor_names.append(name) tensor = np.zeros((year_count, movieid_count, actorid_count)) for index, row in movie_actor_df.iterrows(): year = row["year"] movieid = row["movieid"] actorid = row["actorid"] year_id = year_dict[year] movieid_id = movieid_dict[movieid] actorid_id = actorid_dict[actorid] tensor[year_id][movieid_id][actorid_id] = 1 return tensor def print_latent_semantics(self, r): """ Pretty print latent semantics :param r: """ i = 0 for factor in self.factors: print(self.print_list[i]) latent_semantics = self.util.get_latent_semantics(r, factor.transpose()) self.util.print_latent_semantics(latent_semantics, self.get_factor_names(i)) i += 1 def get_factor_names(self, i): """ Obtain factor names :param i: :return: factor names """ if i == 0: return self.ordered_years elif i == 1: return self.ordered_movie_names elif i == 2: return self.ordered_actor_names def get_partitions(self, no_of_partitions): """ Partition factor matrices :param no_of_partitions: :return: list of groupings """ i = 0 groupings_list = [] for factor in self.factors: groupings = self.util.partition_factor_matrix(factor, no_of_partitions, self.get_factor_names(i)) groupings_list.append(groupings) i += 1 return groupings_list def print_partitioned_entities(self, no_of_partitions): """ Pretty print groupings :param no_of_partitions: """ groupings_list = self.get_partitions(no_of_partitions) i = 0 for groupings in groupings_list: print(self.print_list[i]) self.util.print_partitioned_entities(groupings) i += 1