def fit(self, train_data, lambda_i=0.001, lambda_j=0.001, topK_bpr=200, l1_ratio=0.1, topK_elasticNet=300, alpha_elasticNet=0.0002, sgd_mode='sgd'): print('Fitting...') self.urm = buildURMMatrix(train_data) self.top_pop_songs = train_data['track_id'].value_counts().head( 20).index.values self.col_i_i_recommender.fit(train_data) self.col_u_u_recommender.fit(train_data) self.cbf_recommender.fit(train_data) self.slim_recommender = SLIM_BPR_Cython(train_data) self.slim_recommender.fit(lambda_i=lambda_i, lambda_j=lambda_j, topK=topK_bpr, sgd_mode=sgd_mode) self.slim_elasticNet_recommender = SLIMElasticNetRecommender( train_data) self.slim_elasticNet_recommender.fit(l1_ratio=l1_ratio, topK=topK_elasticNet, alpha=alpha_elasticNet) self.als_recommender.fit(self.urm)
def fit(self, train_data, lambda_i=0.001, lambda_j=0.001, topK=200, sgd_mode='sgd'): print('Fitting...') self.urm = buildURMMatrix(train_data) self.top_pop_songs = train_data['track_id'].value_counts().head(20).index.values self.col_i_i_recommender.fit(train_data) self.cbf_recommender.fit(train_data) self.slim_recommender = SLIM_BPR_Cython(train_data) self.slim_recommender.fit(lambda_i=lambda_i, lambda_j=lambda_j, topK=topK, sgd_mode=sgd_mode)
import time from parameterTunning.AbstractClassSearch import DictionaryKeys from loader.loader import save_dataframe, train_data, target_data, test_data, tracks_data from utils.auxUtils import Evaluator, buildURMMatrix, filter_seen import pandas as pd from slimRS.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython import matplotlib.pyplot as plt from parameterTunning.GridSearch import GridSearch from sklearn.model_selection import GridSearchCV URM_train = buildURMMatrix(train_data) URM_test = buildURMMatrix(test_data) rs = SLIM_BPR_Cython(train_data) grid_param = { 'lambda_i': [1e-1, 1e-2, 1e-3, 1e-4], 'lambda_j': [1e-1, 1e-2, 1e-3, 1e-4], 'topK': [300, 400, 500] } evaluator = Evaluator() gd_sr = GridSearchCV(estimator=rs, param_grid=grid_param, scoring=evaluator.evaluate(rs.recommend(target_data["playlist_id"]), test_data), n_jobs=2) gd_sr.fit(URM_train)
class HybridRS: train_data = pd.DataFrame() def __init__(self, tracks_data, at=10, k_cbf=35, shrinkage_cbf=150, k_i_i=700, shrinkage_i_i=200, \ k_u_u=200, shrinkage_u_u=50, similarity='cosine', tf_idf=True): self.k_cbf = k_cbf self.k_i_i = k_i_i self.k_u_u = k_u_u self.at = at self.shrinkage_cbf = shrinkage_cbf self.shrinkage_i_i = shrinkage_i_i self.shrinkage_u_u = shrinkage_u_u self.similarity = similarity self.tf_idf = tf_idf self.cbf_recommender = CbfRS(tracks_data, self.at, self.k_cbf, self.shrinkage_cbf) self.col_i_i_recommender = ColBfIIRS(self.at, self.k_i_i, self.shrinkage_i_i, tf_idf=self.tf_idf) self.col_u_u_recommender = ColBfUURS(self.at, self.k_u_u, self.shrinkage_u_u, tf_idf=self.tf_idf) def fit(self, train_data, lambda_i=0.001, lambda_j=0.001, topK_bpr=200, l1_ratio=0.1, topK_elasticNet=300, alpha_elasticNet=0.0002, sgd_mode='sgd'): print('Fitting...') self.urm = buildURMMatrix(train_data) self.top_pop_songs = train_data['track_id'].value_counts().head(20).index.values self.col_i_i_recommender.fit(train_data) self.col_u_u_recommender.fit(train_data) self.cbf_recommender.fit(train_data) self.slim_recommender = SLIM_BPR_Cython(train_data) self.slim_recommender.fit(lambda_i=lambda_i, lambda_j=lambda_j, topK=topK_bpr, sgd_mode=sgd_mode) self.slim_elasticNet_recommender = SLIMElasticNetRecommender(train_data) self.slim_elasticNet_recommender.fit(l1_ratio=l1_ratio, topK=topK_elasticNet, alpha=alpha_elasticNet) self.pureSVD = PureSVDRecommender(train_data) self.pureSVD.fit() def recommend(self, playlist_ids, alpha=0.1, beta=1, gamma=1, delta=2, omega=30, eta=0.8, filter_top_pop=False): print("Recommending... Am I filtering top_top songs?", filter_top_pop) final_prediction = {} counter = 0 # e_r_ stands for estimated rating e_r_cbf = self.cbf_recommender.get_estimated_ratings() e_r_col_i_i = self.col_i_i_recommender.get_estimated_ratings() e_r_col_u_u = self.col_u_u_recommender.get_estimated_ratings() e_r_slim_bpr = self.slim_recommender.get_estimated_ratings() e_r_slim_elasticNet = self.slim_elasticNet_recommender.get_estimated_ratings() ''' print("CBF") print(e_r_cbf[7].data[e_r_cbf[7].data.argsort()[::-1]]) print("COL_I_I") print(e_r_col_i_i[7].data[e_r_col_i_i[7].data.argsort()[::-1]]) print("COL_U_U") print(e_r_col_u_u[7].data[e_r_col_u_u[7].data.argsort()[::-1]]) print("pureSVD") print(e_r_pureSVD[7].data[e_r_pureSVD[7].data.argsort()[::-1]]) ''' estimated_ratings_aux1 = e_r_col_u_u.multiply(alpha) + e_r_col_i_i.multiply(beta) + e_r_cbf.multiply(gamma) estimated_ratings_aux2 = estimated_ratings_aux1 + e_r_slim_bpr.multiply(delta) estimated_ratings_final = estimated_ratings_aux2 + e_r_slim_elasticNet.multiply(omega) print('after sum..') for k in playlist_ids: try: row = estimated_ratings_final[k] # getting the row from svd # try with check matrix.. mf_row = sparse.csr_matrix(self.pureSVD.compute_score_SVD(k)).multiply(eta) # summing it to the row we are considering row += mf_row # aux contains the indices (track_id) of the most similar songs indx = row.data.argsort()[::-1] aux = row.indices[indx] user_playlist = self.urm[k] aux = np.concatenate((aux, self.top_pop_songs), axis=None) top_songs = filter_seen(aux, user_playlist) if filter_top_pop: top_songs = filter_seen_array(top_songs, self.top_pop_songs)[:self.at] else: top_songs = top_songs[:self.at] if len(top_songs) < 10: print("Francisco was right once") string = ' '.join(str(e) for e in top_songs) final_prediction.update({k: string}) except IndexError: print("I don't have a value in the test_data") if (counter % 1000) == 0: print("Playlist num", counter, "/10000") counter += 1 df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids']) # print(df) return df
class HybridRS: train_data = pd.DataFrame() def __init__(self, tracks_data, at=10, k_cbf=10, shrinkage_cbf=10, k_i_i=700, shrinkage_i_i=200,\ k_u_u=200, shrinkage_u_u=50, similarity='cosine', tf_idf=True): self.k_cbf = k_cbf self.k_i_i = k_i_i self.k_u_u = k_u_u self.at = at self.shrinkage_cbf = shrinkage_cbf self.shrinkage_i_i = shrinkage_i_i self.shrinkage_u_u = shrinkage_u_u self.similarity = similarity self.tf_idf = tf_idf self.cbf_recommender = CbfRS(tracks_data, self.at, self.k_cbf, self.shrinkage_cbf, tf_idf=self.tf_idf) self.col_i_i_recommender = ColBfIIRS(self.at, self.k_i_i, self.shrinkage_i_i, tf_idf=self.tf_idf) self.col_u_u_recommender = ColBfUURS(self.at, self.k_u_u, self.shrinkage_u_u, tf_idf=self.tf_idf) def fit(self, train_data, lambda_i=0.001, lambda_j=0.001, topK_bpr=200, l1_ratio=0.1, topK_elasticNet=300, alpha_elasticNet=0.0002, sgd_mode='sgd'): print('Fitting...') self.urm = buildURMMatrix(train_data) self.top_pop_songs = train_data['track_id'].value_counts().head( 20).index.values self.col_i_i_recommender.fit(train_data) self.col_u_u_recommender.fit(train_data) self.cbf_recommender.fit(train_data) self.slim_recommender = SLIM_BPR_Cython(train_data) self.slim_recommender.fit(lambda_i=lambda_i, lambda_j=lambda_j, topK=topK_bpr, sgd_mode=sgd_mode) self.slim_elasticNet_recommender = SLIMElasticNetRecommender( train_data) self.slim_elasticNet_recommender.fit(l1_ratio=l1_ratio, topK=topK_elasticNet, alpha=alpha_elasticNet) def recommend(self, playlist_ids, alpha=0.1, beta=1, gamma=1, delta=2, omega=30, filter_top_pop=False): print("Recommending... Am I filtering top_top songs?", filter_top_pop) final_prediction = {} counter = 0 # e_r_ stands for estimated rating e_r_cbf = self.cbf_recommender.get_estimated_ratings() e_r_col_i_i = self.col_i_i_recommender.get_estimated_ratings() e_r_col_u_u = self.col_u_u_recommender.get_estimated_ratings() e_r_slim_bpr = self.slim_recommender.get_estimated_ratings() e_r_slim_elasticNet = self.slim_elasticNet_recommender.get_estimated_ratings( ) ''' print("CBF") print(e_r_cbf[7].data[e_r_cbf[7].data.argsort()[::-1]]) print("COL_I_I") print(e_r_col_i_i[7].data[e_r_col_i_i[7].data.argsort()[::-1]]) print("COL_U_U") print(e_r_col_u_u[7].data[e_r_col_u_u[7].data.argsort()[::-1]]) print("SLIM") print(e_r_slim_bpr[7].data[e_r_slim_bpr[7].data.argsort()[::-1]]) ''' estimated_ratings_aux1 = e_r_col_u_u.multiply( alpha) + e_r_col_i_i.multiply(beta) + e_r_cbf.multiply(gamma) # print("Hybrid") # print(estimated_ratings_final[7].data[estimated_ratings_final[7].data.argsort()[::-1]]) estimated_ratings_aux2 = estimated_ratings_aux1 + e_r_slim_bpr.multiply( delta) estimated_ratings_final = estimated_ratings_aux2 + e_r_slim_elasticNet.multiply( omega) # print("FINAL") # print(estimated_ratings_final[7].data[estimated_ratings_final[7].data.argsort()[::-1]]) for k in playlist_ids: try: row = estimated_ratings_final[k] indx = row.data.argsort()[::-1] aux = row.indices[indx] user_playlist = self.urm[k] aux = np.concatenate((aux, self.top_pop_songs), axis=None) top_songs = filter_seen(aux, user_playlist)[:self.at] string = ' '.join(str(e) for e in top_songs) final_prediction.update({k: string}) except IndexError: print("I don't have a value in the test_data") if (counter % 1000) == 0: print("Playlist num", counter, "/10000") counter += 1 df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids']) # print(df) return df
rs_content = CbfRS(tracks_data, 10, 10, 10, tf_idf=True) rs_content.fit(train_data) predictions_content = rs_content.recommend(target_data['playlist_id']) map_content_based = [] rs_pureSVD = PureSVDRecommender(train_data) rs_pureSVD.fit() predictions_pureSVD = rs_pureSVD.recommend(target_data['playlist_id']) map_pureSVD = [] rs_mf_skl = MfNnz(train_data) rs_mf_skl.fit() prediction_mf_skl = rs_mf_skl.recommend(target_data['playlist_id']) map_mf_sl = [] rs_slimBPR = SLIM_BPR_Cython(train_data) # , URM_validation=test_data) rs_slimBPR.fit(playlist_ids=target_data['playlist_id']) prediction_slimBPR = rs_slimBPR.recommend(target_data['playlist_id']) map_slimBPR = [] rs_slimEN = SLIMElasticNetRecommender(train_data) rs_slimEN.fit(l1_ratio=0.25, topK=300) prediction_slimEN = rs_slimEN.recommend(target_data['playlist_id']) map_slimEN = [] for group_id in range(0, 5): start_pos = group_id * block_size end_pos = min((group_id + 1) * block_size, len(profile_length)) users_in_group = sorted_users[start_pos:end_pos] users_in_group_p_len = profile_length[users_in_group]
class HybridRS: train_data = pd.DataFrame() def __init__(self, tracks_data, at=10, k_cbf=10, shrinkage_cbf=10, k_i_i=700, shrinkage_i_i=200,\ k_u_u=200, shrinkage_u_u=50, similarity='cosine', tf_idf=True, bm_25=False): self.k_cbf = k_cbf self.k_i_i = k_i_i self.k_u_u = k_u_u self.at = at self.shrinkage_cbf = shrinkage_cbf self.shrinkage_i_i = shrinkage_i_i self.shrinkage_u_u = shrinkage_u_u self.similarity = similarity self.tf_idf = tf_idf self.cbf_recommender = CbfRS(tracks_data, self.at, self.k_cbf, self.shrinkage_cbf, tf_idf=self.tf_idf) self.col_i_i_recommender = ColBfIIRS(self.at, self.k_i_i, self.shrinkage_i_i, tf_idf=self.tf_idf) self.col_u_u_recommender = ColBfUURS(self.at, self.k_u_u, self.shrinkage_u_u, tf_idf=self.tf_idf) def fit(self, train_data, lambda_i=0.001, lambda_j=0.001, topK_bpr=200, l1_ratio=0.1, topK_elasticNet=300, alpha_elasticNet=0.0002, sgd_mode='sgd'): print('Fitting...') self.urm = buildURMMatrix(train_data) self.top_pop_songs = train_data['track_id'].value_counts().head(20).index.values self.col_i_i_recommender.fit(train_data) self.col_u_u_recommender.fit(train_data) self.cbf_recommender.fit(train_data) self.pureSVD = PureSVDRecommender(train_data) self.pureSVD.fit() self.p3alpha = P3alphaRecommender(train_data) self.p3alpha.fit() self.rp3beta = RP3betaRecommender(train_data) self.rp3beta.fit() self.slim_recommender = SLIM_BPR_Cython(train_data) self.slim_recommender.fit(lambda_i=lambda_i, lambda_j=lambda_j, topK=topK_bpr, sgd_mode=sgd_mode) self.slim_elasticNet_recommender = SLIMElasticNetRecommender(train_data) self.slim_elasticNet_recommender.fit(l1_ratio=l1_ratio, topK=topK_elasticNet, alpha=alpha_elasticNet) def recommend(self, playlist_ids, alpha=0.2, beta=10, gamma=1, delta=2, omega=30, eta=10, theta=30, sigma=1, filter_top_pop=False): print("Recommending... Am I filtering top_top songs?", filter_top_pop) final_prediction = {} cbf_sym = self.cbf_recommender.get_sym_matrix(gamma) cii_sym = self.col_i_i_recommender.get_sym_matrix(beta) p3a_sym = self.p3alpha.get_sym_matrix(theta) rp3b_sym = self.rp3beta.get_sym_matrix(sigma) slim_sym = self.slim_recommender.get_sym_matrix(delta) en_sym = self.slim_elasticNet_recommender.get_sym_matrix(omega) sym = cbf_sym + cii_sym + p3a_sym + slim_sym + en_sym + rp3b_sym # e_r_ stands for estimated rating e_r_hybrid = self.urm*sym # print(e_r_hybrid) e_r_col_u_u = self.col_u_u_recommender.get_estimated_ratings() ''' e_r_slim_bpr = self.slim_recommender.get_estimated_ratings() e_r_slim_elasticNet = self.slim_elasticNet_recommender.get_estimated_ratings() ''' # estimated_ratings_pureSVD = self.pureSVD.U.dot(self.pureSVD.s_Vt) # print(estimated_ratings_pureSVD) estimated_ratings_final = e_r_col_u_u.multiply(alpha) + e_r_hybrid # + estimated_ratings_pureSVD * eta for k in tqdm(playlist_ids): try: row = estimated_ratings_final[k].toarray()[0] + (self.pureSVD.compute_score_SVD(k)*eta) ''' indx = row.data.argsort()[::-1] aux = row.indices[indx] ''' aux = row.argsort()[::-1] user_playlist = self.urm[k] aux = np.concatenate((aux, self.top_pop_songs), axis=None) top_songs = filter_seen(aux, user_playlist)[:self.at] string = ' '.join(str(e) for e in top_songs) final_prediction.update({k: string}) except IndexError: print("I don't have a value in the test_data") df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids']) # print(df) return df
class HybridRS: train_data = pd.DataFrame() def __init__(self, at=10): print("Hybrid Slim(s) Initialized") self.at = at def fit(self, train_data, lambda_i=0.001, lambda_j=0.001, topK_bpr=200, l1_ratio=0.1, topK_elasticNet=200, alpha_elasticNet=0.0001, sgd_mode='sgd'): print('Fitting...') self.urm = buildURMMatrix(train_data) self.top_pop_songs = train_data['track_id'].value_counts().head( 20).index.values self.slim_bpr_recommender = SLIM_BPR_Cython(train_data) self.slim_bpr_recommender.fit(lambda_i=lambda_i, lambda_j=lambda_j, topK=topK_bpr, sgd_mode=sgd_mode) self.slim_elasticNet_recommender = SLIMElasticNetRecommender( train_data) self.slim_elasticNet_recommender.fit(l1_ratio=l1_ratio, topK=topK_elasticNet, alpha=alpha_elasticNet) def recommend(self, playlist_ids, omega=60, filter_top_pop=False): print("Recommending... Am I filtering top_top songs?", filter_top_pop) final_prediction = {} counter = 0 e_r_slim_bpr = self.slim_bpr_recommender.get_estimated_ratings() e_r_slim_elasticNet = self.slim_elasticNet_recommender.get_estimated_ratings( ) ''' print("SLIM_BPR") print(e_r_slim_bpr[7].data[e_r_slim_bpr[7].data.argsort()[::-1]]) print("SLIM_ElasticNet") print(e_r_slim_elasticNet[7].data[e_r_slim_elasticNet[7].data.argsort()[::-1]]) ''' estimated_ratings_final = e_r_slim_bpr + e_r_slim_elasticNet.multiply( omega) # print("FINAL") # print(estimated_ratings_final[7].data[estimated_ratings_final[7].data.argsort()[::-1]]) for k in playlist_ids: try: row = estimated_ratings_final[k] # aux contains the indices (track_id) of the most similar songs indx = row.data.argsort()[::-1] aux = row.indices[indx] user_playlist = self.urm[k] aux = np.concatenate((aux, self.top_pop_songs), axis=None) top_songs = filter_seen(aux, user_playlist)[:10] string = ' '.join(str(e) for e in top_songs) final_prediction.update({k: string}) except IndexError: print("I don't have a value in the test_data") if (counter % 1000) == 0: print("Playlist num", counter, "/10000") counter += 1 df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids']) # print(df) return df