コード例 #1
0
def collaborative_filtering(is_test):
    print('*** Test Collaborative Filtering Recommender ***')

    b = Builder()
    ev = Evaluator()
    ev.split()
    rec = CollaborativeFilteringRec.CollaborativeFilteringRec()

    S_UCM = b.get_S_UCM_KNN(b.get_UCM(b.get_URM()), 500)

    rec.fit(ev.get_URM_train(), ev.get_target_playlists(),
            ev.get_target_tracks(), ev.num_playlists_to_test, S_UCM, True)
    train_df = rec.recommend()

    if is_test:
        map5 = ev.map5(train_df)
        print('CollaborativeFiltering MAP@5:', map5)
    else:
        print('Prediction saved!')
        train_df.to_csv('CollaborativeFiltering.csv', sep=',', index=False)
コード例 #2
0
def item_user_avg(is_test):
    print('*** Test Item User Avg Recommender ***')

    b = Builder()
    ev = Evaluator()
    ev.split()
    rec = ItemUserAvgRec.ItemUserAvgRec()

    S_ICM = b.build_S_ICM_knn(b.build_ICM(), 250)
    S_UCM = b.get_S_UCM_KNN(b.get_UCM(b.get_URM()), 500)

    rec.fit(ev.get_URM_train(), ev.get_target_playlists(),
            ev.get_target_tracks(), ev.num_playlists_to_test, S_ICM, S_UCM,
            True, 0.80)

    train_df = rec.recommend()

    if is_test:
        map5 = ev.map5(train_df)
        print('ItemUserAvg MAP@5:', map5)
    else:
        print('Prediction saved!')
        train_df.to_csv('ItemUserAvg.csv', sep=',', index=False)
コード例 #3
0
 def get_URM_train(self):
     b = Builder()
     return b.get_URM()
コード例 #4
0
def hybrid_repo(is_test):
    b = Builder()
    ev = Evaluator()
    ev.split()
    ICM = b.build_ICM()

    URM_train, URM_test = train_test_holdout(b.get_URM(), train_perc=0.8)
    URM_train, URM_validation = train_test_holdout(URM_train, train_perc=0.9)

    from ParameterTuning.AbstractClassSearch import EvaluatorWrapper
    from Base.Evaluation.Evaluator import SequentialEvaluator

    evaluator_validation = SequentialEvaluator(URM_validation, cutoff_list=[5])
    evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[5, 10])

    evaluator_validation = EvaluatorWrapper(evaluator_validation)
    evaluator_test = EvaluatorWrapper(evaluator_test)

    from KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
    from ParameterTuning.BayesianSearch import BayesianSearch

    recommender_class = ItemKNNCFRecommender

    parameterSearch = BayesianSearch(recommender_class,
                                     evaluator_validation=evaluator_validation,
                                     evaluator_test=evaluator_test)

    from ParameterTuning.AbstractClassSearch import DictionaryKeys

    hyperparamethers_range_dictionary = {}
    hyperparamethers_range_dictionary["topK"] = [
        5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
    ]
    hyperparamethers_range_dictionary["shrink"] = [
        0, 10, 50, 100, 200, 300, 500, 1000
    ]
    hyperparamethers_range_dictionary["similarity"] = ["cosine"]
    hyperparamethers_range_dictionary["normalize"] = [True, False]

    recommenderDictionary = {
        DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
        DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
        DictionaryKeys.FIT_POSITIONAL_ARGS: dict(),
        DictionaryKeys.FIT_KEYWORD_ARGS: dict(),
        DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
        hyperparamethers_range_dictionary
    }

    output_root_path = "result_experiments/"

    import os

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    output_root_path += recommender_class.RECOMMENDER_NAME

    n_cases = 2
    metric_to_optimize = "MAP"

    best_parameters = parameterSearch.search(recommenderDictionary,
                                             n_cases=n_cases,
                                             output_root_path=output_root_path,
                                             metric=metric_to_optimize)

    itemKNNCF = ItemKNNCFRecommender(URM_train)
    itemKNNCF.fit(**best_parameters)

    from FW_Similarity.CFW_D_Similarity_Linalg import CFW_D_Similarity_Linalg

    n_cases = 2
    metric_to_optimize = "MAP"

    best_parameters_ItemKNNCBF = parameterSearch.search(
        recommenderDictionary,
        n_cases=n_cases,
        output_root_path=output_root_path,
        metric=metric_to_optimize)

    itemKNNCBF = ItemKNNCBFRecommender(ICM, URM_train)
    itemKNNCBF.fit(**best_parameters_ItemKNNCBF)
    """
    #_____________________________________________________________________
    from ParameterTuning.BayesianSearch import BayesianSearch
    from ParameterTuning.AbstractClassSearch import DictionaryKeys

    from ParameterTuning.AbstractClassSearch import EvaluatorWrapper

    evaluator_validation_tuning = EvaluatorWrapper(evaluator_validation)
    evaluator_test_tuning = EvaluatorWrapper(evaluator_test)

    recommender_class = CFW_D_Similarity_Linalg

    parameterSearch = BayesianSearch(recommender_class,
                                     evaluator_validation=evaluator_validation_tuning,
                                     evaluator_test=evaluator_test_tuning)

    hyperparamethers_range_dictionary = {}
    hyperparamethers_range_dictionary["topK"] = [5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800]
    hyperparamethers_range_dictionary["add_zeros_quota"] = range(0, 1)
    hyperparamethers_range_dictionary["normalize_similarity"] = [True, False]

    recommenderDictionary = {DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train, ICM, itemKNNCF.W_sparse],
                             DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                             DictionaryKeys.FIT_POSITIONAL_ARGS: dict(),
                             DictionaryKeys.FIT_KEYWORD_ARGS: dict(),
                             DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary}

    output_root_path = "result_experiments/"

    import os

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    output_root_path += recommender_class.RECOMMENDER_NAME

    n_cases = 2
    metric_to_optimize = "MAP"

    best_parameters_CFW_D = parameterSearch.search(recommenderDictionary,
                                                   n_cases=n_cases,
                                                   output_root_path=output_root_path,
                                                   metric=metric_to_optimize)

    CFW_weithing = CFW_D_Similarity_Linalg(URM_train, ICM, itemKNNCF.W_sparse)
    CFW_weithing.fit(**best_parameters_CFW_D)
    #___________________________________________________________________________________________-

    """

    from GraphBased.P3alphaRecommender import P3alphaRecommender

    P3alpha = P3alphaRecommender(URM_train)
    P3alpha.fit()

    from MatrixFactorization.PureSVD import PureSVDRecommender

    #pureSVD = PureSVDRecommender(URM_train)
    #pureSVD.fit()

    rec = HybridRec.HybridRec()

    S_UCM = b.get_S_UCM_KNN(b.get_UCM(ev.get_URM_train()), 600)
    S_ICM = b.build_S_ICM_knn(b.build_ICM(), 250)

    rec.fit(ev.get_URM_train(),
            ev.get_target_playlists(),
            ev.get_target_tracks(),
            ev.num_playlists_to_test,
            itemKNNCBF.W_sparse,
            itemKNNCF.W_sparse,
            P3alpha.W_sparse,
            is_test=True,
            alfa=0.7,
            avg=0.3)

    train_df = rec.recommend()

    if is_test:
        map5 = ev.map5(train_df)
        print('Hybrid MAP@10:', map5)
        return map5
    else:
        print('Prediction saved!')
        train_df.to_csv(os.path.dirname(os.path.realpath(__file__))[:-19] +
                        "/all/sub.csv",
                        sep=',',
                        index=False)
        return 0

    #hybridrecommender = ItemKNNSimilarityHybridRecommender(URM_train, itemKNNCF.W_sparse, P3alpha.W_sparse)
    #hybridrecommender.fit(alpha=0.5)

    #print(evaluator_validation.evaluateRecommender(hybridrecommender))
    """
コード例 #5
0
class Evaluator(object):
    def __init__(self):
        self.b = Builder()
        self.URM_train = None
        self.test_df = None
        self.target_playlists = None
        self.target_tracks = None
        self.num_playlists_to_test = 10000

    def get_URM_train(self):
        return self.URM_train

    def get_test_df(self):
        return self.test_df

    def get_target_playlists(self):
        return self.target_playlists

    def get_target_tracks(self):
        return self.target_tracks

    def split(self):
        """
        Splits the dataset into training and test set.
        Builds the URM train csr matrix and the test dataframe in a
        submission-like structure.
        """

        print('Splitting the dataset...')

        # Load the original data set and group by playlist

        URM_df = self.b.get_train_final()
        grouped = URM_df.groupby(
            'playlist_id', as_index=True).apply(lambda x: list(x['track_id']))
        grouped.sort_index(inplace=True)

        # Set num_playlist_to_test

        self.num_playlists_to_test = int(self.b.get_URM().shape[0] * 0.20)

        # Find indices of playlists to test and set target_playlists

        testable_idx = grouped[[len(x) >= 10 for x in grouped]].index
        test_idx = np.random.choice(testable_idx,
                                    self.num_playlists_to_test,
                                    replace=False)
        test_idx.sort()
        self.target_playlists = test_idx

        # Extract the test set portion of the data set

        test_mask = grouped[test_idx]
        test_mask.sort_index(inplace=True)

        # Iterate over the test set to randomly remove 5 tracks from each playlist

        test_df_list = []
        i = 0
        for t in test_mask:
            t_tracks_to_test = np.random.choice(t, 5, replace=False)
            test_df_list.append([test_idx[i], t_tracks_to_test])
            for tt in t_tracks_to_test:
                t.remove(tt)
            i += 1

        # Build test_df and URM_train

        self.test_df = pd.DataFrame(test_df_list,
                                    columns=['playlist_id', 'track_ids'])

        URM_train_matrix = MultiLabelBinarizer(
            classes=self.b.get_tracks(),
            sparse_output=True).fit_transform(grouped)
        self.URM_train = URM_train_matrix.tocsr()

        # Set target tracks

        t_list = [t for sub in self.test_df['track_ids'] for t in sub]
        t_list_unique = list(set(t_list))
        t_list_unique.sort()

        self.target_tracks = t_list_unique

    def ap(self, recommended_items, relevant_items):
        """
        Compute AP = Average Precision
        """

        is_relevant = np.in1d(recommended_items,
                              relevant_items,
                              assume_unique=True)

        # Cumulative sum: precision at 1, at 2, at 3 ...
        p_at_k = is_relevant * np.cumsum(is_relevant, dtype=np.float32) / (
            1 + np.arange(is_relevant.shape[0]))

        map_score = np.sum(p_at_k) / np.min(
            [relevant_items.shape[0], is_relevant.shape[0]])

        return map_score

    def map5(self, train_df):
        """
        Compute MAP@5 on train_dataframe with known
        results in the test_dataframe
        """
        map5 = 0

        train_matrix = pd.DataFrame.as_matrix(train_df['track_ids'])
        test_matrix = pd.DataFrame.as_matrix(self.test_df['track_ids'])

        for i in range(0, self.num_playlists_to_test):
            map5 = map5 + self.ap(train_matrix[i], test_matrix[i])

        return map5 / self.num_playlists_to_test