def PersnalizedPageRank_top5SimilarMovies1(userMovies): DataHandler.createDictionaries1() u = decompositions.CPDecomposition( DataHandler.getTensor_ActorMovieGenreYearRankRating(), 5) movies = sorted(list(DataHandler.movie_actor_map.keys())) u1 = u[1] movieNewDSpace = pd.DataFrame(u1, index=movies) movie_movie_similarity = DataHandler.movie_movie_Similarity1( movieNewDSpace) movieid_name_map = DataHandler.movieid_name_map alpha = constants.ALPHA movie_similarities = ppr.personalizedPageRank(movie_movie_similarity, userMovies, alpha) movies = list(movie_movie_similarity.index) movieDF = pd.DataFrame(pd.Series(movies), columns=['movies']) movieDF['movies'] = movieDF['movies'].map( lambda x: movieid_name_map.get(x)) Result = pd.concat([movie_similarities, movieDF], axis=1) sortedResult = Result.sort_values(by=0, ascending=False).head(15) seedmovieNames = [movieid_name_map.get(i) for i in userMovies] print('Movies similar to the following seed movies: ' + str(seedmovieNames)) movie_genre_map = DataHandler.movie_genre_map genreForSeedMovies = [movie_genre_map.get(i) for i in userMovies] print('Genres for seed movies: ' + str(genreForSeedMovies)) for index in sortedResult.index: if sortedResult.loc[index, 'movies'] not in seedmovieNames: print(sortedResult.loc[index, 'movies'] + ' ' + str(sortedResult.loc[index, 0]) + ' ' + str(movie_genre_map.get(movies[index])))
def create_CP_Tensor_pickle(): CP_Tensor = None try: CP_Tensor = pickle.load( open(constants.DIRECTORY + "CP_Decomposition_5_dim.pickle", "rb")) except (OSError, IOError) as e: CP_Tensor = decompositions.CPDecomposition( DataHandler.getTensor_ActorMovieGenre(), 5) pickle.dump( CP_Tensor, open(constants.DIRECTORY + "CP_Decomposition_5_dim.pickle", "wb")) return CP_Tensor
def top5SimilarMovies1(userMovies): DataHandler.createDictionaries1() u = decompositions.CPDecomposition(DataHandler.getTensor_ActorMovieGenreYearRankRating(),5) movies = sorted(list(DataHandler.movie_actor_map.keys())) u1= u[1] movieNewDSpace = pd.DataFrame(u1,index = movies) movie_movie_similarity = DataHandler.movie_movie_Similarity1(movieNewDSpace) movieid_name_map = DataHandler.movieid_name_map alpha = constants.ALPHA movie_similarities = pagerank.PPR(movie_movie_similarity,userMovies,alpha) print('Movies similar to the following seed movies: '+str([movieid_name_map.get(i) for i in userMovies])) for index,sim in movie_similarities: if (movie_movie_similarity.columns[index] not in userMovies): print(movieid_name_map.get(movie_movie_similarity.columns[index])+' '+ str(sim))
def Recommender(userId): DataHandler.createDictionaries1() movieRatedSeed = DataHandler.userMovieRatings(userId) actor_movie_rank_map = DataHandler.actor_movie_rank_map decomposed = decompositions.CPDecomposition(DataHandler.getTensor_ActorMovieGenre(),5) moviesList = sorted(list(DataHandler.movie_actor_rank_map.keys())) movie_movie_similarity = DataHandler.movie_movie_Similarity1(pd.DataFrame(decomposed[1],index=moviesList)) prData = ppr.personalizedPageRankWeighted(movie_movie_similarity, movieRatedSeed, 0.9) rankedItems = sorted(list(map(lambda x:(moviesList[x[0]],x[1]),prData.itertuples())),key=lambda x:x[1], reverse=True) movieid_name_map = DataHandler.movieid_name_map seedmovieNames = [movieid_name_map[k] for k,y in movieRatedSeed] print("Movies similar to the users seed movies " + str(seedmovieNames) + " are:") return [(movieid_name_map[k],y) for (k,y) in rankedItems if k not in [k for k,y in movieRatedSeed]]
def task1c(userId): global wt DataHandler.createDictionaries1() decomposed = decompositions.CPDecomposition( DataHandler.getTensor_ActorMovieGenre(), 5) moviesList = sorted(list(DataHandler.movie_actor_rank_map.keys())) movie_movie_similarity = DataHandler.movie_movie_Similarity1( pd.DataFrame(decomposed[1], index=moviesList)) moviesWatched_timestamp = list( DataHandler.user_rated_or_tagged_date_map.get(userId)) moviesWatched_timestamp = sorted(moviesWatched_timestamp, key=itemgetter(1)) moviesWatched_timestamp_sorted = list( list(zip(*moviesWatched_timestamp))[0]) resultMovies = getWeightedSimilarityOrder(movie_movie_similarity, userId) movieid_name_map = DataHandler.movieid_name_map resultMovieNames = [movieid_name_map[movieid] for movieid in resultMovies] watchedMovieNames = [ movieid_name_map[movieid] for movieid in moviesWatched_timestamp_sorted ] print('Movies Watched by the user in order: ' + str(watchedMovieNames)) print('Top 5 movies : ' + str(resultMovieNames))
def top5LatentCP(tensorIdentifier, space): if (tensorIdentifier == 'AMY'): tensor, actors, movies, years = DataHandler.getTensor_ActorMovieYear() u = decompositions.CPDecomposition(tensor, constants.RANK) if (space == 'Actor'): actorIdActorsDf = DataHandler.actor_info_df actorRank = np.array(u[0]) split_group_with_index = formatter.splitGroup(actorRank, 5) get_partition_on_ids(split_group_with_index, actorIdActorsDf['name']) semantics = np.matrix(actorRank.T).tolist() print("Top 5 semantics are:") for semantic in semantics: prettyPrintVector(semantic, actors, actorIdActorsDf, 'id') print("") return if (space == 'Movie'): movieIdMoviesDf = DataHandler.genre_movie_df movieRank = np.array(u[1]) split_group_with_index = formatter.splitGroup(movieRank, 5) get_partition_on_ids(split_group_with_index, movieIdMoviesDf['moviename']) semantics = np.matrix(movieRank.T).tolist() print("Top 5 semantics are:") for semantic in semantics: prettyPrintVector(semantic, movies, movieIdMoviesDf, 'movieid') print("") return if (space == 'Year'): movieIdMoviesDf = DataHandler.genre_movie_df YearRank = np.array(u[2]) split_group_with_index = formatter.splitGroup(YearRank, 5) get_partition_on_ids(split_group_with_index, years) semantics = np.matrix(YearRank.T).tolist() print("Top 5 semantics are:") for semantic in semantics: prettyPrintYearVector(semantic, years, movieIdMoviesDf, 'year') print("") return else: print('Wrong Space') return if (tensorIdentifier == 'TMR'): tensor, tags, movies, ranks = DataHandler.getTensor_TagMovieRating() u = decompositions.CPDecomposition(tensor,constants.RANK) if (space == 'Tag'): tagIdTagsDf = DataHandler.tag_id_df tagRank = np.array(u[0]) split_group_with_index = formatter.splitGroup(tagRank, 5) get_partition_on_ids(split_group_with_index, tagIdTagsDf['tag']) semantics = np.matrix(tagRank.T).tolist() print("Top 5 semantics are:") for semantic in semantics: prettyPrintVector(semantic, tags, tagIdTagsDf, 'tagId') print("") return if (space == 'Movie'): movieIdMoviesDf = DataHandler.genre_movie_df movieRank = np.array(u[1]) split_group_with_index = formatter.splitGroup(movieRank, 5) get_partition_on_ids(split_group_with_index, movieIdMoviesDf['moviename']) semantics = np.matrix(movieRank.T).tolist() print("Top 5 semantics are:") for semantic in semantics: prettyPrintVector(semantic, movies, movieIdMoviesDf, 'movieid') print("") return if (space == 'Rating'): userRatings = DataHandler.user_ratings_df RankingRank = np.array(u[2]) split_group_with_index = formatter.splitGroup(RankingRank, 5) get_partition_on_ids(split_group_with_index, ranks) semantics = np.matrix(RankingRank.T).tolist() print("Top 5 semantics are:") for semantic in semantics: prettyPrintRankVector(semantic, ranks, userRatings, 'rating') print("") return else: print('Wrong Space') return else: print('Wrong Tensor Identifier')