Exemple #1
0
    def __init__(self,
                 media_type='movie',
                 db_name='VionelDB',
                 collection_name='BoxerMovies',
                 hostname='192.168.1.80',
                 port=27017):

        if media_type == 'movie':
            self.feature_weight_dict = {
                'imdbDirector': 0.7,
                'imdbGenre': 0.7 * 69325.4760905 / 223399.25309 / 3,
                'imdbKeyword': 0.5,
                'wikiKeyword': 0,
                'vionelTheme': 0.5,
                'vionelScene': 0.35,
                'locationCountry': 0.3,
                'locationCity': 0.5,
                'imdbMainactor': 0.7 * 215719.641732 / 223399.25309 / 3,
                'RGB': 0.25,
                'brightness': 0.25
            }
        elif media_type == 'tv':
            self.feature_weight_dict = {
                'imdbCreator': 1,
                'imdbGenre': 1,
                'imdbMainactor': 1,
                'imdbKeyword': 10
            }
        self.mongo_manager = MongoManager(db_name, collection_name, hostname,
                                          port)
        self.media_type = media_type
    def __init__(self, media_type='movie', 
                       db_name='VionelDB', 
                       collection_name='BoxerMovies', 
                       hostname='192.168.1.80', 
                       port=27017):

        if media_type == 'movie':
            self.feature_weight_dict = {
                'imdbDirector': 0.7,
                'imdbGenre': 0.7 * 69325.4760905 / 223399.25309 / 3,
                'imdbKeyword': 0.5,
                'wikiKeyword': 0,
                'vionelTheme': 0.5,
                'vionelScene': 0.35,
                'locationCountry': 0.3,
                'locationCity': 0.5,
                'imdbMainactor': 0.7 * 215719.641732/223399.25309 / 3,
                'RGB': 0.25,
                'brightness': 0.25
            }
        elif media_type == 'tv':
            self.feature_weight_dict = {
                'imdbCreator': 1,
                'imdbGenre': 1,
                'imdbMainactor': 1,
                'imdbKeyword': 10
            }
        self.mongo_manager = MongoManager(db_name, collection_name, hostname, port)
        self.media_type = media_type
    def __init__(self):

        self.feature_weight_dict = {
            'imdbDirector': 0.7,
            'imdbGenre': 0.5,
            'imdbKeyword': 0.6,
            'wikiKeyword': 1.3,
            'vionelTheme': 1.3,
            'vionelScene': 0.35,
            'locationCountry': 0.3,
            'locationCity': 0.5,
            'imdbMainactor': 0.9,
            'RGB': 0.25,
            'brightness': 0.25
        }
        self.mongo_manager = MongoManager('VionelMovies', 'BoxerMovies', '172.17.42.1', 27017, '', '')
class SimilarityRecommender(object):

    def __init__(self):

        self.feature_weight_dict = {
            'imdbDirector': 0.7,
            'imdbGenre': 0.5,
            'imdbKeyword': 0.6,
            'wikiKeyword': 1.3,
            'vionelTheme': 1.3,
            'vionelScene': 0.35,
            'locationCountry': 0.3,
            'locationCity': 0.5,
            'imdbMainactor': 0.9,
            'RGB': 0.25,
            'brightness': 0.25
        }
        self.mongo_manager = MongoManager('VionelMovies', 'BoxerMovies', '172.17.42.1', 27017, '', '')


    def __get_imdbid_feature_dict(self, feature_name):
        result_dict = {}
        all_movies_feature_dict_list = self.mongo_manager.exec_query({}, {"imdbId": 1, feature_name: 1, "_id": 0})

        for movie in all_movies_feature_dict_list:
            imdbid = movie["imdbId"]
            try:
                feature = movie[feature_name]
                result_dict[imdbid] = feature
            except KeyError:
                continue

        return result_dict



    def __get_imdbid_similarity_dict(self, movieid_list, recommended_by):

        movieid_with_featureid_dict = {}
        input_featureid_with_number_dict = {}

        movieid_with_featureid_dict = self.__get_imdbid_feature_dict(recommended_by)

        result_dict = {}
        if recommended_by == "imdbDirectors" or recommended_by == "imdbGenres" or recommended_by == "locationCountry" or recommended_by == "locationCity" or recommended_by == "vionelScene" or recommended_by == "imdbMainactors" or recommended_by == "RGB" or recommended_by == "Brightness":

            input_featureid_with_number_dict = intersection_of_values_for_certain_keys(movieid_list, movieid_with_featureid_dict)
            all_featureid_list = input_featureid_with_number_dict.keys()

            for k, v in movieid_with_featureid_dict.items():
                intersection_list = list(set(v).intersection(set(all_featureid_list)))
                if not intersection_list:
                    result_dict[k] = 0
                    continue
                compared_movie_feature_num_dict = intersection_of_values_for_certain_keys([k], movieid_with_featureid_dict)
                cosine_score = calculate_cosine(input_featureid_with_number_dict, compared_movie_feature_num_dict)
                result_dict[k] = cosine_score

            return result_dict

        else:
            input_movie_features = []
            input_movie_features = union_of_values_for_spec_keys(movieid_list, movieid_with_featureid_dict)

            coefficient = 0.1
            for k, v in movieid_with_featureid_dict.items():
                intersection_num = len(list(set(v).intersection(set(input_movie_features))))
                score = intersection_num * coefficient
                if score > 1:
                    score = 1
                result_dict[k] = score

            return result_dict



    def __language_filter(self, input_movieid_list, combined_movieid_sim_counter):

        imdbid_language_dict = self.__get_imdbid_feature_dict("language")

        languages_in_liked_list = []
        for item in input_movieid_list:
            try:
                languages_in_liked_list += imdbid_language_dict[item]
            except KeyError:
                continue

        languages_in_liked_list = list(set(languages_in_liked_list))

        delete_list = []
        for imdbid in combined_movieid_sim_counter:
            language_list = imdbid_language_dict[imdbid]
            if not language_list:
                continue
            intersection_list = list(set(languages_in_liked_list).intersection(set(language_list)))
            if not intersection_list: # 如果为空,则排除此电影
                delete_list.append(imdbid)

        for x in delete_list:
            del combined_movieid_sim_counter[x]

        return combined_movieid_sim_counter


    def __multiply_coefficient(self, movieid_score_counter, coefficient):
        result_count = Counter()
        for k, v in movieid_score_counter.items():
            result_count[k] = v * coefficient
        return result_count


    def features_contribute_most(self, recommended_movies_dict):
        """Get the features that contribute most for each recommended movie."""
        movieid_featurescore_dict = {}
        for movieid in recommended_movies_dict:
            feature_score_dict = {}
            for feature in self.feature_weight_dict:
                variable_name = feature.lower()
                exec "feature_score_dict['%s'] = self.%s_movieid_sim_counter['%s']" % (feature, variable_name, movieid)
            movieid_featurescore_dict[movieid] = feature_score_dict
        return movieid_featurescore_dict


    def recommend_for_each_feature(self, input_movieid_list, num_of_recommended_movies):
        """Generate variables dynamicly

            Generate 'self.imdbdirectors_movieid_sim_counter' this kind
            of variables.
            We will generate for each feature which will be used in the
            following steps.

        """

        for feature in self.feature_weight_dict:
            feature_movieid_sim_dict = self.__get_imdbid_similarity_dict(input_movieid_list, feature)
            feature_movieid_sim_counter = Counter(feature_movieid_sim_dict)
            feature_movieid_sim_counter = self.__multiply_coefficient(feature_movieid_sim_counter, self.feature_weight_dict[feature])

            for movieid in input_movieid_list:
                del feature_movieid_sim_counter[movieid]

            variable_name = feature.lower()
            exec "self.%s_movieid_sim_counter = feature_movieid_sim_counter" % variable_name


    def recommend(self, input_movieid_list, num_of_recommended_movies):
        """Return recommended movies and the features that contribute most in this recommendation.
            Format of the return(if num_of_recommended_movies is 2):
            {
                "movie": {
                            "tt0340855": 0.6837561795878957,
                            "tt1124035": 0.9627459173643833,
                         },
                "reason": {
                            "tt0340855": {
                                  "imdbDirector": 0,
                                  "brightness": 0.025,
                                  "locationCountry": 0.21213203435596423,
                                  "vionelTheme": 0.13,
                                  "RGB": 0.0625,
                                  "locationCity": 0.20412414523193148,
                                  "wikiKeyword": 0,
                                  "imdbGenre": 0.05,
                                  "vionelScene": 0,
                                  "imdbMainactor": 0,
                                  "imdbKeyword": 0
                                },
                            "tt1124035": {
                                  "imdbDirector": 0,
                                  "brightness": 0,
                                  "locationCountry": 0.21213203435596423,
                                  "vionelTheme": 0,
                                  "RGB": 0.0625,
                                  "locationCity": 0.15811388300841897,
                                  "wikiKeyword": 0.13,
                                  "imdbGenre": 0.05,
                                  "vionelScene": 0.35,
                                  "imdbMainactor": 0,
                                  "imdbKeyword": 0
                        }
            }

        """

        self.recommend_for_each_feature(input_movieid_list, num_of_recommended_movies)

        combined_movieid_sim_counter = Counter()
        for feature in self.feature_weight_dict:
            exec "combined_movieid_sim_counter += self.%s_movieid_sim_counter" % feature.lower()

        # filter
        combined_movieid_sim_counter = self.__language_filter(input_movieid_list, combined_movieid_sim_counter)
        final_recommended_movies_dict = dict(combined_movieid_sim_counter.most_common(num_of_recommended_movies))

        movieid_featurewithscore_dict = self.features_contribute_most(final_recommended_movies_dict)

        # print reason_tuple_list
        result_dict = dict()
        result_dict["movie"] = final_recommended_movies_dict
        result_dict["reason"] = movieid_featurewithscore_dict

        return result_dict
Exemple #5
0
class SimilarityRecommender(object):
    def __init__(self,
                 media_type='movie',
                 db_name='VionelDB',
                 collection_name='BoxerMovies',
                 hostname='192.168.1.80',
                 port=27017):

        if media_type == 'movie':
            self.feature_weight_dict = {
                'imdbDirector': 0.7,
                'imdbGenre': 0.7 * 69325.4760905 / 223399.25309 / 3,
                'imdbKeyword': 0.5,
                'wikiKeyword': 0,
                'vionelTheme': 0.5,
                'vionelScene': 0.35,
                'locationCountry': 0.3,
                'locationCity': 0.5,
                'imdbMainactor': 0.7 * 215719.641732 / 223399.25309 / 3,
                'RGB': 0.25,
                'brightness': 0.25
            }
        elif media_type == 'tv':
            self.feature_weight_dict = {
                'imdbCreator': 1,
                'imdbGenre': 1,
                'imdbMainactor': 1,
                'imdbKeyword': 10
            }
        self.mongo_manager = MongoManager(db_name, collection_name, hostname,
                                          port)
        self.media_type = media_type

    def __del__(self):
        self.mongo_manager.close()

    def __get_imdbid_feature_dict(self, feature_name):
        result_dict = {}
        all_movies_feature_dict_list = self.mongo_manager.exec_query(
            {}, {
                "imdbId": 1,
                feature_name: 1,
                "_id": 0
            })

        for movie in all_movies_feature_dict_list:
            imdbid = movie["imdbId"]
            try:
                feature = movie[feature_name]
                result_dict[imdbid] = feature
            except KeyError:
                continue

        return result_dict

    def __get_imdbid_similarity_dict(self, movieid_list, recommended_by):

        movieid_with_featureid_dict = {}
        input_featureid_with_number_dict = {}

        movieid_with_featureid_dict = self.__get_imdbid_feature_dict(
            recommended_by)

        result_dict = {}
        if recommended_by == 'imdbMainactor' or recommended_by == 'imdbDirector' or recommended_by == 'imdbGenre' or recommended_by == "locationCountry" or recommended_by == "locationCity" or recommended_by == "vionelScene" or recommended_by == "RGB" or recommended_by == "brightness" or recommended_by == 'imdbCreator':

            input_featureid_with_number_dict = intersection_of_values_for_certain_keys(
                movieid_list, movieid_with_featureid_dict)
            all_featureid_list = input_featureid_with_number_dict.keys()

            for k, v in movieid_with_featureid_dict.items():
                intersection_list = list(
                    set(v).intersection(set(all_featureid_list)))
                if not intersection_list:
                    result_dict[k] = 0
                    continue
                compared_movie_feature_num_dict = intersection_of_values_for_certain_keys(
                    [k], movieid_with_featureid_dict)
                cosine_score = calculate_cosine(
                    input_featureid_with_number_dict,
                    compared_movie_feature_num_dict)
                result_dict[k] = cosine_score

            return result_dict

        else:

            input_movie_features = []
            input_movie_features = union_of_values_for_spec_keys(
                movieid_list, movieid_with_featureid_dict)

            coefficient = 0.1
            for k, v in movieid_with_featureid_dict.items():
                intersection_num = len(
                    list(set(v).intersection(set(input_movie_features))))

                score = intersection_num * coefficient
                if score > 1:
                    score = 1
                result_dict[k] = score

            return result_dict

    def __language_filter(self, input_movieid_list,
                          combined_movieid_sim_counter):

        imdbid_language_dict = self.__get_imdbid_feature_dict("language")

        languages_in_liked_list = []
        for item in input_movieid_list:
            try:
                languages_in_liked_list += imdbid_language_dict[item]
            except KeyError:
                continue

        languages_in_liked_list = list(set(languages_in_liked_list))

        delete_list = []
        for imdbid in combined_movieid_sim_counter:
            language_list = imdbid_language_dict[imdbid]
            if not language_list:
                continue
            intersection_list = list(
                set(languages_in_liked_list).intersection(set(language_list)))
            if not intersection_list:  # 如果为空,则排除此电影
                delete_list.append(imdbid)

        for x in delete_list:
            del combined_movieid_sim_counter[x]

        return combined_movieid_sim_counter

    def __multiply_coefficient(self, movieid_score_counter, coefficient):
        result_count = Counter()
        for k, v in movieid_score_counter.items():
            result_count[k] = v * coefficient
        return result_count

    def features_contribute_most(self, recommended_movies_dict):
        """Get the features that contribute most for each recommended movie."""
        movieid_featurescore_dict = {}
        for movieid in recommended_movies_dict:
            feature_score_dict = {}
            for feature in self.feature_weight_dict:
                variable_name = feature.lower()
                exec "feature_score_dict['%s'] = self.%s_movieid_sim_counter['%s']" % (
                    feature, variable_name, movieid)
            movieid_featurescore_dict[movieid] = OrderedDict(
                sorted(feature_score_dict.items(),
                       key=lambda t: t[1],
                       reverse=True))
        return movieid_featurescore_dict

    def recommend_for_each_feature(self, input_movieid_list,
                                   num_of_recommended_movies):
        """Generate variables dynamicly

            Generate 'self.imdbdirectors_movieid_sim_counter' this kind
            of variables.
            We will generate for each feature which will be used in the
            following steps.

        """

        for feature in self.feature_weight_dict:
            feature_movieid_sim_dict = self.__get_imdbid_similarity_dict(
                input_movieid_list, feature)
            feature_movieid_sim_counter = Counter(feature_movieid_sim_dict)
            feature_movieid_sim_counter = self.__multiply_coefficient(
                feature_movieid_sim_counter, self.feature_weight_dict[feature])

            for movieid in input_movieid_list:
                del feature_movieid_sim_counter[movieid]

            variable_name = feature.lower()
            exec "self.%s_movieid_sim_counter = feature_movieid_sim_counter" % variable_name

    def recommend(self, input_movie, num_of_recommended_movies):
        """Return recommended movies and the features that contribute most in this recommendation.
            Format of the return(if num_of_recommended_movies is 2):
            {
                "movie": {
                            "tt0340855": 0.6837561795878957,
                            "tt1124035": 0.9627459173643833,
                         },
                "reason": {
                            "tt0340855": {
                                  "imdbDirector": 0,
                                  "brightness": 0.025,
                                  "locationCountry": 0.21213203435596423,
                                  "vionelTheme": 0.13,
                                  "RGB": 0.0625,
                                  "locationCity": 0.20412414523193148,
                                  "wikiKeyword": 0,
                                  "imdbGenre": 0.05,
                                  "vionelScene": 0,
                                  "imdbMainactor": 0,
                                  "imdbKeyword": 0
                                },
                            "tt1124035": {
                                  "imdbDirector": 0,
                                  "brightness": 0,
                                  "locationCountry": 0.21213203435596423,
                                  "vionelTheme": 0,
                                  "RGB": 0.0625,
                                  "locationCity": 0.15811388300841897,
                                  "wikiKeyword": 0.13,
                                  "imdbGenre": 0.05,
                                  "vionelScene": 0.35,
                                  "imdbMainactor": 0,
                                  "imdbKeyword": 0
                        }
            }

        """

        # First check if the id is existed.
        assert self.mongo_manager.exec_query({'imdbId': input_movie}, {})

        input_movieid_list = [input_movie]
        self.recommend_for_each_feature(input_movieid_list,
                                        num_of_recommended_movies)

        combined_movieid_sim_counter = Counter()
        for feature in self.feature_weight_dict:
            exec "combined_movieid_sim_counter += self.%s_movieid_sim_counter" % feature.lower(
            )

        if self.media_type == 'movie':
            # filter
            combined_movieid_sim_counter = self.__language_filter(
                input_movieid_list, combined_movieid_sim_counter)
        final_recommended_movies_dict = dict(
            combined_movieid_sim_counter.most_common(
                num_of_recommended_movies))
        final_ordered_recommended_movies_dict = OrderedDict(
            sorted(final_recommended_movies_dict.items(),
                   key=lambda t: t[1],
                   reverse=True))

        movieid_featurewithscore_dict = self.features_contribute_most(
            final_recommended_movies_dict)

        # print reason_tuple_list
        result_dict = dict()
        result_dict["movie"] = final_ordered_recommended_movies_dict
        result_dict["reason"] = movieid_featurewithscore_dict
        return result_dict