Example #1
0
 def fit(self,
         train_data,
         lambda_i=0.001,
         lambda_j=0.001,
         topK_bpr=200,
         l1_ratio=0.1,
         topK_elasticNet=300,
         alpha_elasticNet=0.0002,
         sgd_mode='sgd'):
     print('Fitting...')
     self.urm = buildURMMatrix(train_data)
     self.top_pop_songs = train_data['track_id'].value_counts().head(
         20).index.values
     self.col_i_i_recommender.fit(train_data)
     self.col_u_u_recommender.fit(train_data)
     self.cbf_recommender.fit(train_data)
     self.slim_recommender = SLIM_BPR_Cython(train_data)
     self.slim_recommender.fit(lambda_i=lambda_i,
                               lambda_j=lambda_j,
                               topK=topK_bpr,
                               sgd_mode=sgd_mode)
     self.slim_elasticNet_recommender = SLIMElasticNetRecommender(
         train_data)
     self.slim_elasticNet_recommender.fit(l1_ratio=l1_ratio,
                                          topK=topK_elasticNet,
                                          alpha=alpha_elasticNet)
     self.als_recommender.fit(self.urm)
Example #2
0
    def fit(self, train_data, lambda_i=0.001, lambda_j=0.001, topK=200, sgd_mode='sgd'):
        print('Fitting...')

        self.urm = buildURMMatrix(train_data)
        self.top_pop_songs = train_data['track_id'].value_counts().head(20).index.values
        self.col_i_i_recommender.fit(train_data)
        self.cbf_recommender.fit(train_data)
        self.slim_recommender = SLIM_BPR_Cython(train_data)
        self.slim_recommender.fit(lambda_i=lambda_i, lambda_j=lambda_j, topK=topK, sgd_mode=sgd_mode)
Example #3
0
import time

from parameterTunning.AbstractClassSearch import DictionaryKeys
from loader.loader import save_dataframe, train_data, target_data, test_data, tracks_data
from utils.auxUtils import Evaluator, buildURMMatrix, filter_seen
import pandas as pd

from slimRS.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
import matplotlib.pyplot as plt
from parameterTunning.GridSearch import GridSearch
from sklearn.model_selection import GridSearchCV

URM_train = buildURMMatrix(train_data)
URM_test = buildURMMatrix(test_data)

rs = SLIM_BPR_Cython(train_data)

grid_param = {
    'lambda_i': [1e-1, 1e-2, 1e-3, 1e-4],
    'lambda_j': [1e-1, 1e-2, 1e-3, 1e-4],
    'topK': [300, 400, 500]
}

evaluator = Evaluator()

gd_sr = GridSearchCV(estimator=rs,
                     param_grid=grid_param,
                     scoring=evaluator.evaluate(rs.recommend(target_data["playlist_id"]), test_data),
                     n_jobs=2)

gd_sr.fit(URM_train)
Example #4
0
class HybridRS:

    train_data = pd.DataFrame()

    def __init__(self, tracks_data, at=10, k_cbf=35, shrinkage_cbf=150, k_i_i=700, shrinkage_i_i=200, \
                 k_u_u=200, shrinkage_u_u=50, similarity='cosine', tf_idf=True):
        self.k_cbf = k_cbf
        self.k_i_i = k_i_i
        self.k_u_u = k_u_u
        self.at = at
        self.shrinkage_cbf = shrinkage_cbf
        self.shrinkage_i_i = shrinkage_i_i
        self.shrinkage_u_u = shrinkage_u_u
        self.similarity = similarity
        self.tf_idf = tf_idf
        self.cbf_recommender = CbfRS(tracks_data, self.at, self.k_cbf, self.shrinkage_cbf)
        self.col_i_i_recommender = ColBfIIRS(self.at, self.k_i_i, self.shrinkage_i_i, tf_idf=self.tf_idf)
        self.col_u_u_recommender = ColBfUURS(self.at, self.k_u_u, self.shrinkage_u_u, tf_idf=self.tf_idf)

    def fit(self, train_data, lambda_i=0.001, lambda_j=0.001, topK_bpr=200, l1_ratio=0.1,
            topK_elasticNet=300, alpha_elasticNet=0.0002, sgd_mode='sgd'):
        print('Fitting...')
        self.urm = buildURMMatrix(train_data)
        self.top_pop_songs = train_data['track_id'].value_counts().head(20).index.values
        self.col_i_i_recommender.fit(train_data)
        self.col_u_u_recommender.fit(train_data)
        self.cbf_recommender.fit(train_data)
        self.slim_recommender = SLIM_BPR_Cython(train_data)
        self.slim_recommender.fit(lambda_i=lambda_i, lambda_j=lambda_j, topK=topK_bpr, sgd_mode=sgd_mode)
        self.slim_elasticNet_recommender = SLIMElasticNetRecommender(train_data)
        self.slim_elasticNet_recommender.fit(l1_ratio=l1_ratio, topK=topK_elasticNet, alpha=alpha_elasticNet)

        self.pureSVD = PureSVDRecommender(train_data)
        self.pureSVD.fit()


    def recommend(self, playlist_ids, alpha=0.1, beta=1, gamma=1, delta=2, omega=30, eta=0.8, filter_top_pop=False):
        print("Recommending... Am I filtering top_top songs?", filter_top_pop)

        final_prediction = {}
        counter = 0

        # e_r_ stands for estimated rating
        e_r_cbf = self.cbf_recommender.get_estimated_ratings()
        e_r_col_i_i = self.col_i_i_recommender.get_estimated_ratings()
        e_r_col_u_u = self.col_u_u_recommender.get_estimated_ratings()
        e_r_slim_bpr = self.slim_recommender.get_estimated_ratings()
        e_r_slim_elasticNet = self.slim_elasticNet_recommender.get_estimated_ratings()
        '''
        print("CBF")
        print(e_r_cbf[7].data[e_r_cbf[7].data.argsort()[::-1]])
        print("COL_I_I")
        print(e_r_col_i_i[7].data[e_r_col_i_i[7].data.argsort()[::-1]])
        print("COL_U_U")
        print(e_r_col_u_u[7].data[e_r_col_u_u[7].data.argsort()[::-1]])
        
        print("pureSVD")
        print(e_r_pureSVD[7].data[e_r_pureSVD[7].data.argsort()[::-1]])
        '''
        estimated_ratings_aux1 = e_r_col_u_u.multiply(alpha) + e_r_col_i_i.multiply(beta) + e_r_cbf.multiply(gamma)

        estimated_ratings_aux2 = estimated_ratings_aux1 + e_r_slim_bpr.multiply(delta)

        estimated_ratings_final = estimated_ratings_aux2 + e_r_slim_elasticNet.multiply(omega)

        print('after sum..')
        for k in playlist_ids:
            try:
                row = estimated_ratings_final[k]
                # getting the row from svd
                # try with check matrix..
                mf_row = sparse.csr_matrix(self.pureSVD.compute_score_SVD(k)).multiply(eta)
                # summing it to the row we are considering
                row += mf_row
                # aux contains the indices (track_id) of the most similar songs
                indx = row.data.argsort()[::-1]
                aux = row.indices[indx]
                user_playlist = self.urm[k]

                aux = np.concatenate((aux, self.top_pop_songs), axis=None)

                top_songs = filter_seen(aux, user_playlist)

                if filter_top_pop:
                    top_songs = filter_seen_array(top_songs, self.top_pop_songs)[:self.at]
                else:
                    top_songs = top_songs[:self.at]

                if len(top_songs) < 10:
                    print("Francisco was right once")

                string = ' '.join(str(e) for e in top_songs)
                final_prediction.update({k: string})
            except IndexError:
                print("I don't have a value in the test_data")

            if (counter % 1000) == 0:
                print("Playlist num", counter, "/10000")

            counter += 1

        df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids'])
        # print(df)
        return df
Example #5
0
class HybridRS:

    train_data = pd.DataFrame()

    def __init__(self, tracks_data, at=10, k_cbf=10, shrinkage_cbf=10, k_i_i=700, shrinkage_i_i=200,\
                k_u_u=200, shrinkage_u_u=50, similarity='cosine', tf_idf=True):

        self.k_cbf = k_cbf
        self.k_i_i = k_i_i
        self.k_u_u = k_u_u
        self.at = at
        self.shrinkage_cbf = shrinkage_cbf
        self.shrinkage_i_i = shrinkage_i_i
        self.shrinkage_u_u = shrinkage_u_u
        self.similarity = similarity
        self.tf_idf = tf_idf
        self.cbf_recommender = CbfRS(tracks_data,
                                     self.at,
                                     self.k_cbf,
                                     self.shrinkage_cbf,
                                     tf_idf=self.tf_idf)
        self.col_i_i_recommender = ColBfIIRS(self.at,
                                             self.k_i_i,
                                             self.shrinkage_i_i,
                                             tf_idf=self.tf_idf)
        self.col_u_u_recommender = ColBfUURS(self.at,
                                             self.k_u_u,
                                             self.shrinkage_u_u,
                                             tf_idf=self.tf_idf)

    def fit(self,
            train_data,
            lambda_i=0.001,
            lambda_j=0.001,
            topK_bpr=200,
            l1_ratio=0.1,
            topK_elasticNet=300,
            alpha_elasticNet=0.0002,
            sgd_mode='sgd'):
        print('Fitting...')
        self.urm = buildURMMatrix(train_data)
        self.top_pop_songs = train_data['track_id'].value_counts().head(
            20).index.values
        self.col_i_i_recommender.fit(train_data)
        self.col_u_u_recommender.fit(train_data)
        self.cbf_recommender.fit(train_data)
        self.slim_recommender = SLIM_BPR_Cython(train_data)
        self.slim_recommender.fit(lambda_i=lambda_i,
                                  lambda_j=lambda_j,
                                  topK=topK_bpr,
                                  sgd_mode=sgd_mode)
        self.slim_elasticNet_recommender = SLIMElasticNetRecommender(
            train_data)
        self.slim_elasticNet_recommender.fit(l1_ratio=l1_ratio,
                                             topK=topK_elasticNet,
                                             alpha=alpha_elasticNet)

    def recommend(self,
                  playlist_ids,
                  alpha=0.1,
                  beta=1,
                  gamma=1,
                  delta=2,
                  omega=30,
                  filter_top_pop=False):
        print("Recommending... Am I filtering top_top songs?", filter_top_pop)

        final_prediction = {}
        counter = 0

        # e_r_ stands for estimated rating
        e_r_cbf = self.cbf_recommender.get_estimated_ratings()
        e_r_col_i_i = self.col_i_i_recommender.get_estimated_ratings()
        e_r_col_u_u = self.col_u_u_recommender.get_estimated_ratings()
        e_r_slim_bpr = self.slim_recommender.get_estimated_ratings()
        e_r_slim_elasticNet = self.slim_elasticNet_recommender.get_estimated_ratings(
        )
        '''
        print("CBF")
        print(e_r_cbf[7].data[e_r_cbf[7].data.argsort()[::-1]])
        print("COL_I_I")
        print(e_r_col_i_i[7].data[e_r_col_i_i[7].data.argsort()[::-1]])
        print("COL_U_U")
        print(e_r_col_u_u[7].data[e_r_col_u_u[7].data.argsort()[::-1]])
        print("SLIM")
        print(e_r_slim_bpr[7].data[e_r_slim_bpr[7].data.argsort()[::-1]])
        '''
        estimated_ratings_aux1 = e_r_col_u_u.multiply(
            alpha) + e_r_col_i_i.multiply(beta) + e_r_cbf.multiply(gamma)

        # print("Hybrid")
        # print(estimated_ratings_final[7].data[estimated_ratings_final[7].data.argsort()[::-1]])

        estimated_ratings_aux2 = estimated_ratings_aux1 + e_r_slim_bpr.multiply(
            delta)

        estimated_ratings_final = estimated_ratings_aux2 + e_r_slim_elasticNet.multiply(
            omega)

        # print("FINAL")
        # print(estimated_ratings_final[7].data[estimated_ratings_final[7].data.argsort()[::-1]])

        for k in playlist_ids:
            try:
                row = estimated_ratings_final[k]
                indx = row.data.argsort()[::-1]
                aux = row.indices[indx]
                user_playlist = self.urm[k]

                aux = np.concatenate((aux, self.top_pop_songs), axis=None)
                top_songs = filter_seen(aux, user_playlist)[:self.at]

                string = ' '.join(str(e) for e in top_songs)
                final_prediction.update({k: string})
            except IndexError:
                print("I don't have a value in the test_data")

            if (counter % 1000) == 0:
                print("Playlist num", counter, "/10000")

            counter += 1

        df = pd.DataFrame(list(final_prediction.items()),
                          columns=['playlist_id', 'track_ids'])
        # print(df)
        return df
Example #6
0
rs_content = CbfRS(tracks_data, 10, 10, 10, tf_idf=True)
rs_content.fit(train_data)
predictions_content = rs_content.recommend(target_data['playlist_id'])
map_content_based = []

rs_pureSVD = PureSVDRecommender(train_data)
rs_pureSVD.fit()
predictions_pureSVD = rs_pureSVD.recommend(target_data['playlist_id'])
map_pureSVD = []

rs_mf_skl = MfNnz(train_data)
rs_mf_skl.fit()
prediction_mf_skl = rs_mf_skl.recommend(target_data['playlist_id'])
map_mf_sl = []

rs_slimBPR = SLIM_BPR_Cython(train_data)  # , URM_validation=test_data)
rs_slimBPR.fit(playlist_ids=target_data['playlist_id'])
prediction_slimBPR = rs_slimBPR.recommend(target_data['playlist_id'])
map_slimBPR = []

rs_slimEN = SLIMElasticNetRecommender(train_data)
rs_slimEN.fit(l1_ratio=0.25, topK=300)
prediction_slimEN = rs_slimEN.recommend(target_data['playlist_id'])
map_slimEN = []

for group_id in range(0, 5):
    start_pos = group_id * block_size
    end_pos = min((group_id + 1) * block_size, len(profile_length))

    users_in_group = sorted_users[start_pos:end_pos]
    users_in_group_p_len = profile_length[users_in_group]
Example #7
0
class HybridRS:

    train_data = pd.DataFrame()

    def __init__(self, tracks_data, at=10, k_cbf=10, shrinkage_cbf=10, k_i_i=700, shrinkage_i_i=200,\
                k_u_u=200, shrinkage_u_u=50, similarity='cosine', tf_idf=True, bm_25=False):

        self.k_cbf = k_cbf
        self.k_i_i = k_i_i
        self.k_u_u = k_u_u
        self.at = at
        self.shrinkage_cbf = shrinkage_cbf
        self.shrinkage_i_i = shrinkage_i_i
        self.shrinkage_u_u = shrinkage_u_u
        self.similarity = similarity
        self.tf_idf = tf_idf
        self.cbf_recommender = CbfRS(tracks_data, self.at, self.k_cbf, self.shrinkage_cbf, tf_idf=self.tf_idf)
        self.col_i_i_recommender = ColBfIIRS(self.at, self.k_i_i, self.shrinkage_i_i, tf_idf=self.tf_idf)
        self.col_u_u_recommender = ColBfUURS(self.at, self.k_u_u, self.shrinkage_u_u, tf_idf=self.tf_idf)


    def fit(self, train_data, lambda_i=0.001, lambda_j=0.001, topK_bpr=200, l1_ratio=0.1,
            topK_elasticNet=300, alpha_elasticNet=0.0002, sgd_mode='sgd'):
        print('Fitting...')
        self.urm = buildURMMatrix(train_data)
        self.top_pop_songs = train_data['track_id'].value_counts().head(20).index.values
        self.col_i_i_recommender.fit(train_data)
        self.col_u_u_recommender.fit(train_data)
        self.cbf_recommender.fit(train_data)

        self.pureSVD = PureSVDRecommender(train_data)
        self.pureSVD.fit()

        self.p3alpha = P3alphaRecommender(train_data)
        self.p3alpha.fit()

        self.rp3beta = RP3betaRecommender(train_data)
        self.rp3beta.fit()

        self.slim_recommender = SLIM_BPR_Cython(train_data)
        self.slim_recommender.fit(lambda_i=lambda_i, lambda_j=lambda_j, topK=topK_bpr, sgd_mode=sgd_mode)

        self.slim_elasticNet_recommender = SLIMElasticNetRecommender(train_data)
        self.slim_elasticNet_recommender.fit(l1_ratio=l1_ratio, topK=topK_elasticNet, alpha=alpha_elasticNet)

    def recommend(self, playlist_ids, alpha=0.2, beta=10, gamma=1, delta=2, omega=30, eta=10, theta=30, sigma=1,
                  filter_top_pop=False):
        print("Recommending... Am I filtering top_top songs?", filter_top_pop)

        final_prediction = {}

        cbf_sym = self.cbf_recommender.get_sym_matrix(gamma)

        cii_sym = self.col_i_i_recommender.get_sym_matrix(beta)
        p3a_sym = self.p3alpha.get_sym_matrix(theta)
        rp3b_sym = self.rp3beta.get_sym_matrix(sigma)
        slim_sym = self.slim_recommender.get_sym_matrix(delta)
        en_sym = self.slim_elasticNet_recommender.get_sym_matrix(omega)
        sym = cbf_sym + cii_sym + p3a_sym + slim_sym + en_sym + rp3b_sym
        # e_r_ stands for estimated rating
        e_r_hybrid = self.urm*sym
        # print(e_r_hybrid)
        e_r_col_u_u = self.col_u_u_recommender.get_estimated_ratings()
        '''
        e_r_slim_bpr = self.slim_recommender.get_estimated_ratings()
        e_r_slim_elasticNet = self.slim_elasticNet_recommender.get_estimated_ratings()

        '''
        # estimated_ratings_pureSVD = self.pureSVD.U.dot(self.pureSVD.s_Vt)
        # print(estimated_ratings_pureSVD)
        estimated_ratings_final = e_r_col_u_u.multiply(alpha) + e_r_hybrid  # + estimated_ratings_pureSVD * eta

        for k in tqdm(playlist_ids):
            try:
                row = estimated_ratings_final[k].toarray()[0] + (self.pureSVD.compute_score_SVD(k)*eta)
                '''
                indx = row.data.argsort()[::-1]
                aux = row.indices[indx]
                '''
                aux = row.argsort()[::-1]

                user_playlist = self.urm[k]

                aux = np.concatenate((aux, self.top_pop_songs), axis=None)
                top_songs = filter_seen(aux, user_playlist)[:self.at]

                string = ' '.join(str(e) for e in top_songs)
                final_prediction.update({k: string})
            except IndexError:
                print("I don't have a value in the test_data")


        df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids'])
        # print(df)
        return df
Example #8
0
class HybridRS:

    train_data = pd.DataFrame()

    def __init__(self, at=10):
        print("Hybrid Slim(s) Initialized")
        self.at = at

    def fit(self,
            train_data,
            lambda_i=0.001,
            lambda_j=0.001,
            topK_bpr=200,
            l1_ratio=0.1,
            topK_elasticNet=200,
            alpha_elasticNet=0.0001,
            sgd_mode='sgd'):
        print('Fitting...')

        self.urm = buildURMMatrix(train_data)
        self.top_pop_songs = train_data['track_id'].value_counts().head(
            20).index.values
        self.slim_bpr_recommender = SLIM_BPR_Cython(train_data)
        self.slim_bpr_recommender.fit(lambda_i=lambda_i,
                                      lambda_j=lambda_j,
                                      topK=topK_bpr,
                                      sgd_mode=sgd_mode)
        self.slim_elasticNet_recommender = SLIMElasticNetRecommender(
            train_data)
        self.slim_elasticNet_recommender.fit(l1_ratio=l1_ratio,
                                             topK=topK_elasticNet,
                                             alpha=alpha_elasticNet)

    def recommend(self, playlist_ids, omega=60, filter_top_pop=False):
        print("Recommending... Am I filtering top_top songs?", filter_top_pop)

        final_prediction = {}
        counter = 0

        e_r_slim_bpr = self.slim_bpr_recommender.get_estimated_ratings()
        e_r_slim_elasticNet = self.slim_elasticNet_recommender.get_estimated_ratings(
        )
        '''
        print("SLIM_BPR")
        print(e_r_slim_bpr[7].data[e_r_slim_bpr[7].data.argsort()[::-1]])
        print("SLIM_ElasticNet")
        print(e_r_slim_elasticNet[7].data[e_r_slim_elasticNet[7].data.argsort()[::-1]])
        '''

        estimated_ratings_final = e_r_slim_bpr + e_r_slim_elasticNet.multiply(
            omega)

        # print("FINAL")
        # print(estimated_ratings_final[7].data[estimated_ratings_final[7].data.argsort()[::-1]])

        for k in playlist_ids:
            try:
                row = estimated_ratings_final[k]
                # aux contains the indices (track_id) of the most similar songs
                indx = row.data.argsort()[::-1]
                aux = row.indices[indx]
                user_playlist = self.urm[k]

                aux = np.concatenate((aux, self.top_pop_songs), axis=None)
                top_songs = filter_seen(aux, user_playlist)[:10]

                string = ' '.join(str(e) for e in top_songs)
                final_prediction.update({k: string})
            except IndexError:
                print("I don't have a value in the test_data")

            if (counter % 1000) == 0:
                print("Playlist num", counter, "/10000")

            counter += 1

        df = pd.DataFrame(list(final_prediction.items()),
                          columns=['playlist_id', 'track_ids'])
        # print(df)
        return df