class CoactorCoactorMatrix(object): """ Class to compute the Coactor Matrix which represents the number of movies each pair of actors have acted in, together """ def __init__(self): self.conf = ParseConfig() self.data_set_loc = os.path.join( os.path.abspath(os.path.dirname(__file__)), self.conf.config_section_mapper("filePath").get("data_set_loc")) self.data_extractor = DataExtractor(self.data_set_loc) def fetchCoactorCoactorSimilarityMatrix(self): """ Creates the coactor matrix with all the actors in a given set :return: coactor matrix """ movie_actor_df = self.data_extractor.get_movie_actor_data() movie_actor_set_df = movie_actor_df.groupby( ['actorid'])["movieid"].apply(set).reset_index() num_of_actors = len(movie_actor_df.actorid.unique()) coactor_matrix = [[0] * num_of_actors for i in range(num_of_actors)] for index, movie_set in zip(movie_actor_set_df.index, movie_actor_set_df.movieid): for index_2, movie_set_2 in zip(movie_actor_set_df.index, movie_actor_set_df.movieid): if index != index_2: coactor_matrix[index][index_2] = len( movie_set.intersection(movie_set_2)) numpy.savetxt("coactor_coactor_matrix.csv", coactor_matrix, delimiter=",") return coactor_matrix, movie_actor_set_df.actorid.unique()
class LdaGenreActor(GenreTag): def __init__(self): super().__init__() self.data_set_loc = conf.config_section_mapper("filePath").get("data_set_loc") self.data_extractor = DataExtractor(self.data_set_loc) def get_lda_data(self, genre): """ Does LDA on movie-actor counts and outputs movies in terms of latent semantics as U and actor in terms of latent semantics as Vh :param genre: :return: returns U and Vh """ # Getting movie_genre_data movie_genre_data_frame = self.data_extractor.get_mlmovies_data() movie_genre_data_frame = self.split_genres(movie_genre_data_frame) # Getting actor_movie_data movie_actor_data_frame = self.data_extractor.get_movie_actor_data() genre_actor_frame = movie_genre_data_frame.merge(movie_actor_data_frame, how="left", left_on="movieid", right_on="movieid") # genre_actor_frame = genre_actor_frame[genre_actor_frame['year'].notnull()].reset_index() genre_actor_frame = genre_actor_frame[["movieid", "year", "genre", "actorid", "actor_movie_rank"]] genre_actor_frame["actorid_string"] = pd.Series( [str(id) for id in genre_actor_frame.actorid], index=genre_actor_frame.index) genre_data_frame = genre_actor_frame[genre_actor_frame["genre"]==genre] actor_df = genre_data_frame.groupby(['movieid'])['actorid_string'].apply(list).reset_index() actor_df = actor_df.sort_values('movieid') actor_df.to_csv('movie_actor_lda.csv', index=True, encoding='utf-8') actor_df = list(actor_df.iloc[:,1]) (U, Vh) = util.LDA(actor_df, num_topics=4, num_features=1000) for latent in Vh: print ("\n") print(latent)
class SvdGenreActor(GenreTag): """ Class to relate Genre and Actor, inherits the ActorTag to use the common weighing functons """ def __init__(self): """ Initialiazing the data extractor object to get data from the csv files """ self.data_set_loc = conf.config_section_mapper("filePath").get("data_set_loc") self.data_extractor = DataExtractor(self.data_set_loc) def split_genres(self, data_frame): """ This function extractors genres from each row and converts into independent rows :param data_frame: :return: data frame with multiple genres split into different rows """ genre_data_frame = data_frame['genres'].str.split('|', expand=True).stack() genre_data_frame.name = "genre" genre_data_frame.index = genre_data_frame.index.droplevel(-1) genre_data_frame = genre_data_frame.reset_index() data_frame = data_frame.drop("genres", axis=1) data_frame = data_frame.reset_index() data_frame = genre_data_frame.merge(data_frame, how="left", on="index") return data_frame def assign_rank_weight(self, data_frame): """ This function assigns a value for all the actors in a movie on a scale of 100, based on their rank in the movie. :param tag_series: :return: dictionary of (movieid, actor_rank) to the computed rank_weight """ groupby_movies = data_frame.groupby("movieid") movie_rank_weight_dict = {} for movieid, info_df in groupby_movies: max_rank = info_df.actor_movie_rank.max() for rank in info_df.actor_movie_rank.unique(): movie_rank_weight_dict[(movieid, rank)] = (max_rank - rank + 1)/max_rank*100 return movie_rank_weight_dict def assign_idf_weight(self, data_frame, unique_actors): """ This function computes the idf weight for all tags in a data frame, considering each movie as a document :param data_frame: :param unique_tags: :return: dictionary of tags and idf weights """ idf_counter = {actorid_string: 0 for actorid_string in unique_actors} data_frame.actorid_string = pd.Series([set(actors.split(',')) for actors in data_frame.actorid_string], index=data_frame.index) for actor_list in data_frame.actorid_string: for actorid_string in actor_list: idf_counter[actorid_string] += 1 for actorid_string, count in list(idf_counter.items()): idf_counter[actorid_string] = math.log(len(data_frame.index)/count) return idf_counter def assign_tf_weight(self, actor_series): """ This function computes the tf weight for all tags for a movie :param tag_series: :return: dictionary of tags and tf weights """ counter = Counter() for each in actor_series: counter[each] += 1 total = sum(counter.values()) for each in counter: counter[each] = (counter[each]/total) return dict(counter) def get_model_weight(self, tf_weight_dict, idf_weight_dict, rank_weight_dict, actor_df, model): """ This function combines tf_weight on a scale of 100, idf_weight on a scale of 100, and timestamp_weight on a scale of 10 , based on the model. :param tf_weight_dict, idf_weight_dict, rank_weight_dict, tag_df, model :return: data_frame with column of the combined weight """ if model == "TF": actor_df["value"] = pd.Series( [(ts_weight + (tf_weight_dict.get(movieid, 0).get(actorid_string, 0)*100) + rank_weight_dict.get((movieid, rank), 0)) for index, ts_weight, actorid_string, movieid, rank in zip(actor_df.index, actor_df.year_weight, actor_df.actorid_string, actor_df.movieid, actor_df.actor_movie_rank)], index=actor_df.index) else: actor_df["value"] = pd.Series( [(ts_weight + (tf_weight_dict.get(movieid, 0).get(actorid_string, 0)*(idf_weight_dict.get(actorid_string, 0))*100) + rank_weight_dict.get((movieid, rank), 0)) for index, ts_weight, actorid_string, movieid, rank in zip(actor_df.index, actor_df.year_weight, actor_df.actorid_string, actor_df.movieid, actor_df.actor_movie_rank)], index=actor_df.index) return actor_df def combine_computed_weights(self, data_frame, rank_weight_dict, model, genre): """ Triggers the weighing process and sums up all the calculated weights for each tag :param data_frame: :param rank_weight_dict: :param model: :return: dictionary of tags and weights """ actor_df = data_frame.reset_index() temp_df = data_frame[data_frame["genre"]==genre] unique_actors = actor_df.actorid_string.unique() idf_data = actor_df.groupby(['movieid'])['actorid_string'].apply(lambda x: ','.join(x)).reset_index() tf_df = temp_df.groupby(['movieid'])['actorid_string'].apply(lambda x: ','.join(x)).reset_index() movie_actor_dict = dict(zip(tf_df.movieid, tf_df.actorid_string)) tf_weight_dict = {movie: self.assign_tf_weight(actorid_string.split(',')) for movie, actorid_string in list(movie_actor_dict.items())} idf_weight_dict = {} if model != 'TF': idf_weight_dict = self.assign_idf_weight(idf_data, unique_actors) actor_df = self.get_model_weight(tf_weight_dict, idf_weight_dict, rank_weight_dict, temp_df, model) actor_df["total"] = actor_df.groupby(['actorid_string'])['value'].transform('sum') actor_df = actor_df.drop_duplicates("actorid_string").sort_values("total", ascending=False) #actor_tag_dict = dict(zip(tag_df.tag, tag_df.total)) return actor_df def get_genre_actor_data_frame(self): """ Function to merge mutiple tables and get the required dataframe for tf-idf calculation :return: dataframe """ # Getting movie_genre_data movie_genre_data_frame = self.data_extractor.get_mlmovies_data() movie_genre_data_frame = self.split_genres(movie_genre_data_frame) # Getting actor_movie_data movie_actor_data_frame = self.data_extractor.get_movie_actor_data() genre_actor_frame = movie_genre_data_frame.merge(movie_actor_data_frame, how="left", left_on="movieid", right_on="movieid") #genre_actor_frame = genre_actor_frame[genre_actor_frame['year'].notnull()].reset_index() genre_actor_frame = genre_actor_frame[["movieid", "year", "genre", "actorid", "actor_movie_rank"]] genre_actor_frame = genre_actor_frame.sort_values("year", ascending=True) data_frame_len = len(genre_actor_frame.index) genre_actor_frame["year_weight"] = pd.Series( [(index + 1) / data_frame_len * 10 for index in genre_actor_frame.index], index=genre_actor_frame.index) genre_actor_frame["actorid_string"] = pd.Series( [str(id) for id in genre_actor_frame.actorid], index = genre_actor_frame.index) return genre_actor_frame def svd_genre_actor(self, genre): """ Does SVD on movie-actor matrix and outputs movies in terms of latent semantics as U and actors in terms of latent semantics as Vh :param genre: :return: returns U and Vh """ genre_actor_frame = self.get_genre_actor_data_frame() rank_weight_dict = self.assign_rank_weight(genre_actor_frame[['movieid', 'actor_movie_rank']]) genre_actor_frame = self.combine_computed_weights(genre_actor_frame, rank_weight_dict, "TFIDF", genre) temp_df = genre_actor_frame[["movieid", "actorid_string", "total"]].drop_duplicates() genre_actor_tfidf_df = temp_df.pivot(index='movieid', columns='actorid_string', values='total') genre_actor_tfidf_df = genre_actor_tfidf_df.fillna(0) genre_actor_tfidf_df.to_csv('genre_actor_matrix.csv', index=True, encoding='utf-8') df = pd.DataFrame(pd.read_csv('genre_actor_matrix.csv')) df1 = genre_actor_tfidf_df.values[:, :] row_headers = list(df["movieid"]) column_headers = list(df) del column_headers[0] column_headers_names = [] for col_head in column_headers: col_head_name = util.get_actor_name_for_id(int(col_head)) column_headers_names = column_headers_names + [col_head_name] (U, s, Vh) = util.SVD(df1) # To print latent semantics latents = util.get_latent_semantics(4, Vh) util.print_latent_semantics(latents, column_headers_names) u_frame = pd.DataFrame(U[:, :4], index=row_headers) v_frame = pd.DataFrame(Vh[:4, :], columns=column_headers) u_frame.to_csv('u_1b_svd.csv', index=True, encoding='utf-8') v_frame.to_csv('vh_1b_svd.csv', index=True, encoding='utf-8') return (u_frame, v_frame, s)
class SimilarActorsFromDiffMovies(ActorActorMatrix): def __init__(self): """ Initialiazing the data extractor object to get data from the csv files """ super().__init__() self.data_set_loc = conf.config_section_mapper("filePath").get("data_set_loc") self.data_extractor = DataExtractor(self.data_set_loc) actor_actor_matrix_obj.fetchActorActorSimilarityMatrix() def get_actors_of_movie(self, moviename): """ Function to return the actors of a given movie :param moviename: :return: list(actorids) """ actor_movie_table = self.data_extractor.get_movie_actor_data() movieid = util.get_movie_id(moviename) actor_movie_table = actor_movie_table[actor_movie_table['movieid']== movieid] actorids = actor_movie_table["actorid"].tolist() return actorids def get_movie_tag_matrix(self): """ Function to get movie_tag matrix containing list of tags in each movie :return: movie_tag_matrix """ data_frame = genre_tag.get_genre_data() tag_df = data_frame.reset_index() unique_tags = tag_df.tag.unique() idf_data = tag_df.groupby(['movieid'])['tag'].apply(set) tf_df = tag_df.groupby(['movieid'])['tag'].apply(lambda x: ','.join(x)).reset_index() movie_tag_dict = dict(zip(tf_df.movieid, tf_df.tag)) tf_weight_dict = {movie: genre_tag.assign_tf_weight(tags.split(',')) for movie, tags in list(movie_tag_dict.items())} idf_weight_dict = {} idf_weight_dict = genre_tag.assign_idf_weight(idf_data, unique_tags) tag_df = genre_tag.get_model_weight(tf_weight_dict, idf_weight_dict, tag_df, 'tfidf') tag_df["total"] = tag_df.groupby(['movieid','tag'])['value'].transform('sum') temp_df = tag_df[["moviename", "tag", "total"]].drop_duplicates().reset_index() genre_tag_tfidf_df = temp_df.pivot_table('total', 'moviename', 'tag') genre_tag_tfidf_df = genre_tag_tfidf_df.fillna(0) genre_tag_tfidf_df.to_csv('movie_tag_matrix1d.csv', index=True, encoding='utf-8') return genre_tag_tfidf_df def get_movie_movie_vector(self, moviename): """ Function which finds movie_movie_similarity_matrix in space of tags using tf-idf :param moviename: :return: row of vector of giver movie """ movie_tag_frame = self.get_movie_tag_matrix() movie_tag_matrix = movie_tag_frame.values movies = list(movie_tag_frame.index.values) tags = list(movie_tag_frame) tag_movie_matrix = movie_tag_matrix.transpose() movie_movie_matrix = numpy.dot(movie_tag_matrix, tag_movie_matrix) index_movie = None for i,j in enumerate(movies): if j == moviename: index_movie = i break if index_movie==None: print ("Movie Id not found.") return None movie_row = movie_movie_matrix[index_movie].tolist() movie_movie_dict = dict(zip(movies, movie_row)) del movie_movie_dict[moviename] movie_movie_dict = sorted(movie_movie_dict.items(), key=operator.itemgetter(1), reverse=True) return movie_movie_dict def most_similar_actors(self, moviename): """ Function to find similar actors from similar movies :param moviename: :return: actors """ movieid = util.get_movie_id(moviename) movie_movie_dict = self.get_movie_movie_vector(moviename) if movie_movie_dict == None: return None actors = [] for (movie,val) in movie_movie_dict: if val <= 0: break movieid = util.get_movie_id(movie) actors = actors + self.get_actors_of_movie(movie) if len(actors) >= 10: break actors_of_given_movie = self.get_actors_of_movie(moviename) actorsFinal = [x for x in actors if x not in actors_of_given_movie] actornames = [] for actorid in actorsFinal: actor = util.get_actor_name_for_id(actorid) actornames.append(actor) return actornames
class ActorMovieYearTensor(object): def __init__(self): self.conf = ParseConfig() self.data_set_loc = self.conf.config_section_mapper("filePath").get("data_set_loc") self.data_extractor = DataExtractor(self.data_set_loc) self.ordered_years = [] self.ordered_movie_names = [] self.ordered_actor_names = [] self.print_list = ["\n\nFor Years:", "\n\nFor Movies:", "\n\nFor Actors:"] self.util = Util() self.tensor = self.fetchActorMovieYearTensor() self.factors = self.util.CPDecomposition(self.tensor, 5) def fetchActorMovieYearTensor(self): """ Create actor movie year tensor :return: tensor """ movies_df = self.data_extractor.get_mlmovies_data() actor_df = self.data_extractor.get_movie_actor_data() movie_actor_df = actor_df.merge(movies_df, how="left", on="movieid") year_list = movie_actor_df["year"] year_count = 0 year_dict = {} for element in year_list: if element in year_dict.keys(): continue year_dict[element] = year_count year_count += 1 self.ordered_years.append(element) movieid_list = movie_actor_df["movieid"] movieid_count = 0 movieid_dict = {} for element in movieid_list: if element in movieid_dict.keys(): continue movieid_dict[element] = movieid_count movieid_count += 1 name = self.util.get_movie_name_for_id(element) self.ordered_movie_names.append(name) actorid_list = movie_actor_df["actorid"] actorid_count = 0 actorid_dict = {} for element in actorid_list: if element in actorid_dict.keys(): continue actorid_dict[element] = actorid_count actorid_count += 1 name = self.util.get_actor_name_for_id(element) self.ordered_actor_names.append(name) tensor = np.zeros((year_count, movieid_count, actorid_count)) for index, row in movie_actor_df.iterrows(): year = row["year"] movieid = row["movieid"] actorid = row["actorid"] year_id = year_dict[year] movieid_id = movieid_dict[movieid] actorid_id = actorid_dict[actorid] tensor[year_id][movieid_id][actorid_id] = 1 return tensor def print_latent_semantics(self, r): """ Pretty print latent semantics :param r: """ i = 0 for factor in self.factors: print(self.print_list[i]) latent_semantics = self.util.get_latent_semantics(r, factor.transpose()) self.util.print_latent_semantics(latent_semantics, self.get_factor_names(i)) i += 1 def get_factor_names(self, i): """ Obtain factor names :param i: :return: factor names """ if i == 0: return self.ordered_years elif i == 1: return self.ordered_movie_names elif i == 2: return self.ordered_actor_names def get_partitions(self, no_of_partitions): """ Partition factor matrices :param no_of_partitions: :return: list of groupings """ i = 0 groupings_list = [] for factor in self.factors: groupings = self.util.partition_factor_matrix(factor, no_of_partitions, self.get_factor_names(i)) groupings_list.append(groupings) i += 1 return groupings_list def print_partitioned_entities(self, no_of_partitions): """ Pretty print groupings :param no_of_partitions: """ groupings_list = self.get_partitions(no_of_partitions) i = 0 for groupings in groupings_list: print(self.print_list[i]) self.util.print_partitioned_entities(groupings) i += 1
class ActorTag(object): """ Class to relate actors and tags. """ def __init__(self): """ Initializing the data extractor object to get data from the csv files """ self.data_set_loc = conf.config_section_mapper("filePath").get( "data_set_loc") self.data_extractor = DataExtractor(self.data_set_loc) def assign_idf_weight(self, data_series, unique_tags): """ This function computes the idf weight for all tags in a data frame, considering each movie as a document :param data_frame: :param unique_tags: :return: dictionary of tags and idf weights """ idf_counter = {tag: 0 for tag in unique_tags} for tag_list in data_series: for tag in tag_list: idf_counter[tag] += 1 for tag, count in list(idf_counter.items()): idf_counter[tag] = math.log(len(data_series.index) / count, 2) return idf_counter def assign_tf_weight(self, tag_series): """ This function computes the tf weight for all tags for a movie :param tag_series: :return: dictionary of tags and tf weights """ counter = Counter() for each in tag_series: counter[each] += 1 total = sum(counter.values()) for each in counter: counter[each] = (counter[each] / total) return dict(counter) def assign_rank_weight(self, data_frame): """ This function assigns a value for all the actors in a movie on a scale of 100, based on their rank in the movie. :param tag_series: :return: dictionary of (movieid, actor_rank) to the computed rank_weight """ groupby_movies = data_frame.groupby("movieid") movie_rank_weight_dict = {} for movieid, info_df in groupby_movies: max_rank = info_df.actor_movie_rank.max() for rank in info_df.actor_movie_rank.unique(): movie_rank_weight_dict[( movieid, rank)] = (max_rank - rank + 1) / max_rank * 100 return movie_rank_weight_dict def get_model_weight(self, tf_weight_dict, idf_weight_dict, rank_weight_dict, tag_df, model): """ This function combines tf_weight on a scale of 100, idf_weight on a scale of 100, actor_rank for each tag on scale of 100 and timestamp_weight on a scale of 10 , based on the model. :param tf_weight_dict, idf_weight_dict, rank_weight_dict, tag_df, model :return: data_frame with column of the combined weight """ if model == "TF": tag_df["value"] = pd.Series( [(tf_weight_dict.get(movieid, 0).get(tag, 0) * 100) + rank_weight_dict.get((movieid, rank), 0) for index, ts_weight, tag, movieid, rank in zip( tag_df.index, tag_df.timestamp_weight, tag_df.tag, tag_df.movieid, tag_df.actor_movie_rank)], index=tag_df.index) else: tag_df["value"] = pd.Series( [(ts_weight + (tf_weight_dict.get(movieid, 0).get(tag, 0) * (idf_weight_dict.get(tag, 0)) * 100) + rank_weight_dict.get( (movieid, rank), 0)) for index, ts_weight, tag, movieid, rank in zip( tag_df.index, tag_df.timestamp_weight, tag_df.tag, tag_df.movieid, tag_df.actor_movie_rank)], index=tag_df.index) return tag_df def combine_computed_weights(self, data_frame, rank_weight_dict, idf_weight_dict, model): """ Triggers the weighing process and sums up all the calculated weights for each tag :param data_frame: :param rank_weight_dict: :param model: :return: dictionary of tags and weights """ tag_df = data_frame.reset_index() temp_df = tag_df.groupby( ['movieid'])['tag'].apply(lambda x: ','.join(x)).reset_index() movie_tag_dict = dict(zip(temp_df.movieid, temp_df.tag)) tf_weight_dict = { movie: self.assign_tf_weight(tags.split(',')) for movie, tags in list(movie_tag_dict.items()) } tag_df = self.get_model_weight(tf_weight_dict, idf_weight_dict, rank_weight_dict, tag_df, model) tag_df["total"] = tag_df.groupby(['tag'])['value'].transform('sum') tag_df = tag_df.drop_duplicates("tag").sort_values("total", ascending=False) actor_tag_dict = dict(zip(tag_df.tag, tag_df.total)) return actor_tag_dict def merge_movie_actor_and_tag(self, actorid, model): """ Merges data from different csv files necessary to compute the tag weights for each actor, assigns weights to timestamp. :param actorid: :param model: :return: returns a dictionary of Actors to dictionary of tags and weights. """ mov_act = self.data_extractor.get_movie_actor_data() ml_tag = self.data_extractor.get_mltags_data() genome_tag = self.data_extractor.get_genome_tags_data() actor_info = self.data_extractor.get_imdb_actor_info_data() actor_movie_info = mov_act.merge(actor_info, how="left", left_on="actorid", right_on="id") tag_data_frame = ml_tag.merge(genome_tag, how="left", left_on="tagid", right_on="tagId") merged_data_frame = actor_movie_info.merge(tag_data_frame, how="left", on="movieid") merged_data_frame = merged_data_frame[ merged_data_frame['timestamp'].notnull()] merged_data_frame = merged_data_frame.drop(["userid"], axis=1) rank_weight_dict = self.assign_rank_weight( merged_data_frame[['movieid', 'actor_movie_rank']]) merged_data_frame = merged_data_frame.sort_values( "timestamp", ascending=True).reset_index() data_frame_len = len(merged_data_frame.index) merged_data_frame["timestamp_weight"] = pd.Series( [(index + 1) / data_frame_len * 10 for index in merged_data_frame.index], index=merged_data_frame.index) if model == 'TFIDF': idf_weight_dict = self.assign_idf_weight( merged_data_frame.groupby('movieid')['tag'].apply(set), merged_data_frame.tag.unique()) tag_dict = self.combine_computed_weights( merged_data_frame[merged_data_frame['actorid'] == actorid], rank_weight_dict, idf_weight_dict, model) else: tag_dict = self.combine_computed_weights( merged_data_frame[merged_data_frame['actorid'] == actorid], rank_weight_dict, {}, model) return tag_dict
class LdaActorTag(object): def __init__(self): super().__init__() self.data_set_loc = conf.config_section_mapper("filePath").get( "data_set_loc") self.data_extractor = DataExtractor(self.data_set_loc) self.util = Util() def get_related_actors_lda(self, actorid): """ Function to find similarity between actors using actor-actor similarity vector in tag space using lda :param actorid: :return: """ mov_act = self.data_extractor.get_movie_actor_data() ml_tag = self.data_extractor.get_mltags_data() genome_tag = self.data_extractor.get_genome_tags_data() actor_info = self.data_extractor.get_imdb_actor_info_data() actor_movie_info = mov_act.merge(actor_info, how="left", left_on="actorid", right_on="id") tag_data_frame = ml_tag.merge(genome_tag, how="left", left_on="tagid", right_on="tagId") merged_data_frame = tag_data_frame.merge(actor_movie_info, how="left", on="movieid") merged_data_frame = merged_data_frame.fillna('') tag_df = merged_data_frame.groupby( ['actorid'])['tag'].apply(list).reset_index() tag_df = tag_df.sort_values('actorid') actorid_list = tag_df.actorid.tolist() tag_df = list(tag_df.iloc[:, 1]) (U, Vh) = self.util.LDA(tag_df, num_topics=5, num_features=100000) actor_topic_matrix = self.util.get_doc_topic_matrix( U, num_docs=len(actorid_list), num_topics=5) topic_actor_matrix = actor_topic_matrix.transpose() actor_actor_matrix = numpy.dot(actor_topic_matrix, topic_actor_matrix) numpy.savetxt("actor_actor_matrix_with_svd_latent_values.csv", actor_actor_matrix, delimiter=",") df = pd.DataFrame( pd.read_csv('actor_actor_matrix_with_svd_latent_values.csv', header=None)) matrix = df.values actorids = self.util.get_sorted_actor_ids() index_actor = None for i, j in enumerate(actorids): if j == actorid: index_actor = i break if index_actor == None: print("Actor Id not found.") return None actor_names = [] for actor_id in actorids: actor_name = self.util.get_actor_name_for_id(int(actor_id)) actor_names = actor_names + [actor_name] actor_row = matrix[index_actor].tolist() actor_actor_dict = dict(zip(actor_names, actor_row)) del actor_actor_dict[self.util.get_actor_name_for_id(int(actorid))] # for key in actor_actor_dict.keys(): # actor_actor_dict[key] = abs(actor_actor_dict[key]) actor_actor_dict = sorted(actor_actor_dict.items(), key=operator.itemgetter(1), reverse=True) print(actor_actor_dict[0:10]) return actor_actor_dict[0:10]