def collaborative_filtering(is_test): print('*** Test Collaborative Filtering Recommender ***') b = Builder() ev = Evaluator() ev.split() rec = CollaborativeFilteringRec.CollaborativeFilteringRec() S_UCM = b.get_S_UCM_KNN(b.get_UCM(b.get_URM()), 500) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, S_UCM, True) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('CollaborativeFiltering MAP@5:', map5) else: print('Prediction saved!') train_df.to_csv('CollaborativeFiltering.csv', sep=',', index=False)
def item_user_avg(is_test): print('*** Test Item User Avg Recommender ***') b = Builder() ev = Evaluator() ev.split() rec = ItemUserAvgRec.ItemUserAvgRec() S_ICM = b.build_S_ICM_knn(b.build_ICM(), 250) S_UCM = b.get_S_UCM_KNN(b.get_UCM(b.get_URM()), 500) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, S_ICM, S_UCM, True, 0.80) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('ItemUserAvg MAP@5:', map5) else: print('Prediction saved!') train_df.to_csv('ItemUserAvg.csv', sep=',', index=False)
def get_URM_train(self): b = Builder() return b.get_URM()
def hybrid_repo(is_test): b = Builder() ev = Evaluator() ev.split() ICM = b.build_ICM() URM_train, URM_test = train_test_holdout(b.get_URM(), train_perc=0.8) URM_train, URM_validation = train_test_holdout(URM_train, train_perc=0.9) from ParameterTuning.AbstractClassSearch import EvaluatorWrapper from Base.Evaluation.Evaluator import SequentialEvaluator evaluator_validation = SequentialEvaluator(URM_validation, cutoff_list=[5]) evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[5, 10]) evaluator_validation = EvaluatorWrapper(evaluator_validation) evaluator_test = EvaluatorWrapper(evaluator_test) from KNN.ItemKNNCFRecommender import ItemKNNCFRecommender from ParameterTuning.BayesianSearch import BayesianSearch recommender_class = ItemKNNCFRecommender parameterSearch = BayesianSearch(recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) from ParameterTuning.AbstractClassSearch import DictionaryKeys hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["topK"] = [ 5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800 ] hyperparamethers_range_dictionary["shrink"] = [ 0, 10, 50, 100, 200, 300, 500, 1000 ] hyperparamethers_range_dictionary["similarity"] = ["cosine"] hyperparamethers_range_dictionary["normalize"] = [True, False] recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: dict(), DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } output_root_path = "result_experiments/" import os # If directory does not exist, create if not os.path.exists(output_root_path): os.makedirs(output_root_path) output_root_path += recommender_class.RECOMMENDER_NAME n_cases = 2 metric_to_optimize = "MAP" best_parameters = parameterSearch.search(recommenderDictionary, n_cases=n_cases, output_root_path=output_root_path, metric=metric_to_optimize) itemKNNCF = ItemKNNCFRecommender(URM_train) itemKNNCF.fit(**best_parameters) from FW_Similarity.CFW_D_Similarity_Linalg import CFW_D_Similarity_Linalg n_cases = 2 metric_to_optimize = "MAP" best_parameters_ItemKNNCBF = parameterSearch.search( recommenderDictionary, n_cases=n_cases, output_root_path=output_root_path, metric=metric_to_optimize) itemKNNCBF = ItemKNNCBFRecommender(ICM, URM_train) itemKNNCBF.fit(**best_parameters_ItemKNNCBF) """ #_____________________________________________________________________ from ParameterTuning.BayesianSearch import BayesianSearch from ParameterTuning.AbstractClassSearch import DictionaryKeys from ParameterTuning.AbstractClassSearch import EvaluatorWrapper evaluator_validation_tuning = EvaluatorWrapper(evaluator_validation) evaluator_test_tuning = EvaluatorWrapper(evaluator_test) recommender_class = CFW_D_Similarity_Linalg parameterSearch = BayesianSearch(recommender_class, evaluator_validation=evaluator_validation_tuning, evaluator_test=evaluator_test_tuning) hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["topK"] = [5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800] hyperparamethers_range_dictionary["add_zeros_quota"] = range(0, 1) hyperparamethers_range_dictionary["normalize_similarity"] = [True, False] recommenderDictionary = {DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train, ICM, itemKNNCF.W_sparse], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: dict(), DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary} output_root_path = "result_experiments/" import os # If directory does not exist, create if not os.path.exists(output_root_path): os.makedirs(output_root_path) output_root_path += recommender_class.RECOMMENDER_NAME n_cases = 2 metric_to_optimize = "MAP" best_parameters_CFW_D = parameterSearch.search(recommenderDictionary, n_cases=n_cases, output_root_path=output_root_path, metric=metric_to_optimize) CFW_weithing = CFW_D_Similarity_Linalg(URM_train, ICM, itemKNNCF.W_sparse) CFW_weithing.fit(**best_parameters_CFW_D) #___________________________________________________________________________________________- """ from GraphBased.P3alphaRecommender import P3alphaRecommender P3alpha = P3alphaRecommender(URM_train) P3alpha.fit() from MatrixFactorization.PureSVD import PureSVDRecommender #pureSVD = PureSVDRecommender(URM_train) #pureSVD.fit() rec = HybridRec.HybridRec() S_UCM = b.get_S_UCM_KNN(b.get_UCM(ev.get_URM_train()), 600) S_ICM = b.build_S_ICM_knn(b.build_ICM(), 250) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, itemKNNCBF.W_sparse, itemKNNCF.W_sparse, P3alpha.W_sparse, is_test=True, alfa=0.7, avg=0.3) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('Hybrid MAP@10:', map5) return map5 else: print('Prediction saved!') train_df.to_csv(os.path.dirname(os.path.realpath(__file__))[:-19] + "/all/sub.csv", sep=',', index=False) return 0 #hybridrecommender = ItemKNNSimilarityHybridRecommender(URM_train, itemKNNCF.W_sparse, P3alpha.W_sparse) #hybridrecommender.fit(alpha=0.5) #print(evaluator_validation.evaluateRecommender(hybridrecommender)) """
class Evaluator(object): def __init__(self): self.b = Builder() self.URM_train = None self.test_df = None self.target_playlists = None self.target_tracks = None self.num_playlists_to_test = 10000 def get_URM_train(self): return self.URM_train def get_test_df(self): return self.test_df def get_target_playlists(self): return self.target_playlists def get_target_tracks(self): return self.target_tracks def split(self): """ Splits the dataset into training and test set. Builds the URM train csr matrix and the test dataframe in a submission-like structure. """ print('Splitting the dataset...') # Load the original data set and group by playlist URM_df = self.b.get_train_final() grouped = URM_df.groupby( 'playlist_id', as_index=True).apply(lambda x: list(x['track_id'])) grouped.sort_index(inplace=True) # Set num_playlist_to_test self.num_playlists_to_test = int(self.b.get_URM().shape[0] * 0.20) # Find indices of playlists to test and set target_playlists testable_idx = grouped[[len(x) >= 10 for x in grouped]].index test_idx = np.random.choice(testable_idx, self.num_playlists_to_test, replace=False) test_idx.sort() self.target_playlists = test_idx # Extract the test set portion of the data set test_mask = grouped[test_idx] test_mask.sort_index(inplace=True) # Iterate over the test set to randomly remove 5 tracks from each playlist test_df_list = [] i = 0 for t in test_mask: t_tracks_to_test = np.random.choice(t, 5, replace=False) test_df_list.append([test_idx[i], t_tracks_to_test]) for tt in t_tracks_to_test: t.remove(tt) i += 1 # Build test_df and URM_train self.test_df = pd.DataFrame(test_df_list, columns=['playlist_id', 'track_ids']) URM_train_matrix = MultiLabelBinarizer( classes=self.b.get_tracks(), sparse_output=True).fit_transform(grouped) self.URM_train = URM_train_matrix.tocsr() # Set target tracks t_list = [t for sub in self.test_df['track_ids'] for t in sub] t_list_unique = list(set(t_list)) t_list_unique.sort() self.target_tracks = t_list_unique def ap(self, recommended_items, relevant_items): """ Compute AP = Average Precision """ is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True) # Cumulative sum: precision at 1, at 2, at 3 ... p_at_k = is_relevant * np.cumsum(is_relevant, dtype=np.float32) / ( 1 + np.arange(is_relevant.shape[0])) map_score = np.sum(p_at_k) / np.min( [relevant_items.shape[0], is_relevant.shape[0]]) return map_score def map5(self, train_df): """ Compute MAP@5 on train_dataframe with known results in the test_dataframe """ map5 = 0 train_matrix = pd.DataFrame.as_matrix(train_df['track_ids']) test_matrix = pd.DataFrame.as_matrix(self.test_df['track_ids']) for i in range(0, self.num_playlists_to_test): map5 = map5 + self.ap(train_matrix[i], test_matrix[i]) return map5 / self.num_playlists_to_test