def PersnalizedPageRank_top5SimilarMovies1(userMovies):
    DataHandler.createDictionaries1()
    u = decompositions.CPDecomposition(
        DataHandler.getTensor_ActorMovieGenreYearRankRating(), 5)
    movies = sorted(list(DataHandler.movie_actor_map.keys()))
    u1 = u[1]
    movieNewDSpace = pd.DataFrame(u1, index=movies)
    movie_movie_similarity = DataHandler.movie_movie_Similarity1(
        movieNewDSpace)
    movieid_name_map = DataHandler.movieid_name_map
    alpha = constants.ALPHA
    movie_similarities = ppr.personalizedPageRank(movie_movie_similarity,
                                                  userMovies, alpha)
    movies = list(movie_movie_similarity.index)
    movieDF = pd.DataFrame(pd.Series(movies), columns=['movies'])
    movieDF['movies'] = movieDF['movies'].map(
        lambda x: movieid_name_map.get(x))
    Result = pd.concat([movie_similarities, movieDF], axis=1)
    sortedResult = Result.sort_values(by=0, ascending=False).head(15)
    seedmovieNames = [movieid_name_map.get(i) for i in userMovies]
    print('Movies similar to the following seed movies: ' +
          str(seedmovieNames))
    movie_genre_map = DataHandler.movie_genre_map
    genreForSeedMovies = [movie_genre_map.get(i) for i in userMovies]
    print('Genres for seed movies: ' + str(genreForSeedMovies))
    for index in sortedResult.index:
        if sortedResult.loc[index, 'movies'] not in seedmovieNames:
            print(sortedResult.loc[index, 'movies'] + ' ' +
                  str(sortedResult.loc[index, 0]) + ' ' +
                  str(movie_genre_map.get(movies[index])))
Beispiel #2
0
def create_CP_Tensor_pickle():
    CP_Tensor = None
    try:
        CP_Tensor = pickle.load(
            open(constants.DIRECTORY + "CP_Decomposition_5_dim.pickle", "rb"))
    except (OSError, IOError) as e:
        CP_Tensor = decompositions.CPDecomposition(
            DataHandler.getTensor_ActorMovieGenre(), 5)
        pickle.dump(
            CP_Tensor,
            open(constants.DIRECTORY + "CP_Decomposition_5_dim.pickle", "wb"))

    return CP_Tensor
Beispiel #3
0
def top5SimilarMovies1(userMovies):
    DataHandler.createDictionaries1()
    u = decompositions.CPDecomposition(DataHandler.getTensor_ActorMovieGenreYearRankRating(),5)
    movies = sorted(list(DataHandler.movie_actor_map.keys()))
    u1= u[1]
    movieNewDSpace = pd.DataFrame(u1,index = movies)
    movie_movie_similarity = DataHandler.movie_movie_Similarity1(movieNewDSpace)
    movieid_name_map = DataHandler.movieid_name_map
    alpha = constants.ALPHA
    movie_similarities = pagerank.PPR(movie_movie_similarity,userMovies,alpha)
    print('Movies similar to the following seed movies: '+str([movieid_name_map.get(i) for i in userMovies]))
    for index,sim in movie_similarities:
        if (movie_movie_similarity.columns[index] not in userMovies):
            print(movieid_name_map.get(movie_movie_similarity.columns[index])+' '+ str(sim))
Beispiel #4
0
def Recommender(userId):
    DataHandler.createDictionaries1()
    movieRatedSeed = DataHandler.userMovieRatings(userId)
    
    
    actor_movie_rank_map = DataHandler.actor_movie_rank_map
    decomposed = decompositions.CPDecomposition(DataHandler.getTensor_ActorMovieGenre(),5)
    moviesList = sorted(list(DataHandler.movie_actor_rank_map.keys()))
    movie_movie_similarity = DataHandler.movie_movie_Similarity1(pd.DataFrame(decomposed[1],index=moviesList))
    prData = ppr.personalizedPageRankWeighted(movie_movie_similarity, movieRatedSeed, 0.9)
    rankedItems = sorted(list(map(lambda x:(moviesList[x[0]],x[1]),prData.itertuples())),key=lambda x:x[1], reverse=True)
    movieid_name_map = DataHandler.movieid_name_map

    seedmovieNames = [movieid_name_map[k] for k,y in movieRatedSeed]
    print("Movies similar to the users seed movies " + str(seedmovieNames) + " are:")
    return [(movieid_name_map[k],y) for (k,y) in rankedItems if k not in [k for k,y in movieRatedSeed]]
def task1c(userId):
    global wt
    DataHandler.createDictionaries1()
    decomposed = decompositions.CPDecomposition(
        DataHandler.getTensor_ActorMovieGenre(), 5)
    moviesList = sorted(list(DataHandler.movie_actor_rank_map.keys()))
    movie_movie_similarity = DataHandler.movie_movie_Similarity1(
        pd.DataFrame(decomposed[1], index=moviesList))

    moviesWatched_timestamp = list(
        DataHandler.user_rated_or_tagged_date_map.get(userId))

    moviesWatched_timestamp = sorted(moviesWatched_timestamp,
                                     key=itemgetter(1))
    moviesWatched_timestamp_sorted = list(
        list(zip(*moviesWatched_timestamp))[0])
    resultMovies = getWeightedSimilarityOrder(movie_movie_similarity, userId)
    movieid_name_map = DataHandler.movieid_name_map
    resultMovieNames = [movieid_name_map[movieid] for movieid in resultMovies]
    watchedMovieNames = [
        movieid_name_map[movieid] for movieid in moviesWatched_timestamp_sorted
    ]
    print('Movies Watched by the user in order: ' + str(watchedMovieNames))
    print('Top 5 movies : ' + str(resultMovieNames))
Beispiel #6
0
def top5LatentCP(tensorIdentifier, space):
    if (tensorIdentifier == 'AMY'):
        tensor, actors, movies, years = DataHandler.getTensor_ActorMovieYear()
        u = decompositions.CPDecomposition(tensor, constants.RANK)
        if (space == 'Actor'):
            actorIdActorsDf = DataHandler.actor_info_df
            actorRank = np.array(u[0])
            split_group_with_index = formatter.splitGroup(actorRank, 5)
            get_partition_on_ids(split_group_with_index, actorIdActorsDf['name'])
            semantics = np.matrix(actorRank.T).tolist()
            
            print("Top 5 semantics are:")
            for semantic in semantics:
                prettyPrintVector(semantic, actors, actorIdActorsDf, 'id')
                print("")
            
            return
        if (space == 'Movie'):
            movieIdMoviesDf = DataHandler.genre_movie_df
            movieRank = np.array(u[1])
            split_group_with_index = formatter.splitGroup(movieRank, 5)
            get_partition_on_ids(split_group_with_index, movieIdMoviesDf['moviename'])
            
            semantics = np.matrix(movieRank.T).tolist()
            
            print("Top 5 semantics are:")
            for semantic in semantics:
                prettyPrintVector(semantic, movies, movieIdMoviesDf, 'movieid')
                print("")
                
            return
        if (space == 'Year'):
            movieIdMoviesDf = DataHandler.genre_movie_df
            YearRank = np.array(u[2])
            split_group_with_index = formatter.splitGroup(YearRank, 5)
            get_partition_on_ids(split_group_with_index, years)
            
            semantics = np.matrix(YearRank.T).tolist()
            
            print("Top 5 semantics are:")
            for semantic in semantics:
                prettyPrintYearVector(semantic, years, movieIdMoviesDf, 'year')
                print("")
                
            return
        else:
            print('Wrong Space')
            return
    if (tensorIdentifier == 'TMR'):
        tensor, tags, movies, ranks = DataHandler.getTensor_TagMovieRating()
        u = decompositions.CPDecomposition(tensor,constants.RANK)
        if (space == 'Tag'):
            tagIdTagsDf = DataHandler.tag_id_df
            tagRank = np.array(u[0])
            split_group_with_index = formatter.splitGroup(tagRank, 5)
            get_partition_on_ids(split_group_with_index, tagIdTagsDf['tag'])
            semantics = np.matrix(tagRank.T).tolist()
            
            print("Top 5 semantics are:")
            for semantic in semantics:
                prettyPrintVector(semantic, tags, tagIdTagsDf, 'tagId')
                print("")
                
            return
        if (space == 'Movie'):
            movieIdMoviesDf = DataHandler.genre_movie_df
            movieRank = np.array(u[1])
            split_group_with_index = formatter.splitGroup(movieRank, 5)
            get_partition_on_ids(split_group_with_index, movieIdMoviesDf['moviename'])
            semantics = np.matrix(movieRank.T).tolist()
            
            print("Top 5 semantics are:")
            for semantic in semantics:
                prettyPrintVector(semantic, movies, movieIdMoviesDf, 'movieid')
                print("")
                
            return
        if (space == 'Rating'):
            userRatings = DataHandler.user_ratings_df
            RankingRank = np.array(u[2])
            split_group_with_index = formatter.splitGroup(RankingRank, 5)
            get_partition_on_ids(split_group_with_index, ranks)
            semantics = np.matrix(RankingRank.T).tolist()
            
            print("Top 5 semantics are:")
            for semantic in semantics:
                prettyPrintRankVector(semantic, ranks, userRatings, 'rating')
                print("")
                
            return
        else:
            print('Wrong Space')
            return
    else:
        print('Wrong Tensor Identifier')