def fit(self, train_data): print("Fitting...") if self.tf_idf: self.icm = normalize_tf_idf(self.icm) if self.bm25: self.icm = okapi_BM_25(self.icm) # self.icm = normalize(self.icm, norm='l2', axis=1) self.train_data = train_data self.top_pop_songs = train_data['track_id'].value_counts().head( 20).index.values # calculating the row weights for the similarity... weights_album = np.full(self.num_album, self.weight_album) weights_artist = np.full(self.num_artists, self.weight_artist) row_weights = np.concatenate((weights_album, weights_artist), axis=0) if self.use_track_duration: weights_clust = np.full(self.num_cluster_dur, self.weight_dur) row_weights = np.concatenate((row_weights, weights_clust), axis=0) self.cosine = Cosine_Similarity(self.icm.T, self.k, self.shrinkage, normalize=True, row_weights=row_weights) # self.cosine = Compute_Similarity_Python(self.icm.T, self.k, self.shrinkage, normalize=True) self.sym = check_matrix(self.cosine.compute_similarity(), 'csr') self.urm = buildURMMatrix(train_data)
def fit(self, train_data): print('Fitting...') self.train_data = train_data self.top_pop_songs = train_data['track_id'].value_counts().head( 20).index.values self.urm = buildURMMatrix(train_data) # from some tests, looks like k_con optimal = 40 with no particular shrink self.cosine_cbf = Cosine_Similarity(self.icm.T, self.k_con, self.shrinkage_con, normalize=True, mode=self.similarity_name, row_weights=None) self.cosine_col_u_u = Cosine_Similarity(self.urm.T, self.k_col_u_u, self.shrinkage_col_u_u, normalize=True, mode=self.similarity_name, row_weights=None) self.cosine_col_i_i = Cosine_Similarity(self.urm, self.k_col_i_i, self.shrinkage_col_i_i, normalize=True, mode=self.similarity_name, row_weights=None) # self.sym = check_matrix(cosine_similarity(self.urm, dense_output=False), 'csr') self.sym_cbf = check_matrix(self.cosine_cbf.compute_similarity(), 'csr') self.sym_u_u = check_matrix(self.cosine_col_u_u.compute_similarity(), 'csr') self.sym_i_i = check_matrix(self.cosine_col_i_i.compute_similarity(), 'csr') # self.sym = check_matrix(self.cosine.compute(self.urm), 'csr') print("Sym mat completed")
def __init__(self, data, num_factors=50, lrate=0.01, reg=0.015, iters=10, init_mean=0.0, init_std=0.1, lrate_decay=1.0, rnd_seed=42): ''' Initialize the model :param num_factors: number of latent factors :param lrate: initial learning rate used in SGD :param reg: regularization term :param iters: number of iterations in training the model with SGD :param init_mean: mean used to initialize the latent factors :param init_std: standard deviation used to initialize the latent factors :param lrate_decay: learning rate decay :param rnd_seed: random seed ''' super(AsySVD, self).__init__() self.num_factors = num_factors self.lrate = lrate self.reg = reg self.iters = iters self.init_mean = init_mean self.init_std = init_std self.lrate_decay = lrate_decay self.rnd_seed = rnd_seed self.dataset = buildURMMatrix(data)
def __init__(self, train_data): super(P3alphaRecommender, self).__init__() self.top_pop_songs = train_data['track_id'].value_counts().head( 20).index.values self.URM_train = buildURMMatrix(train_data) self.sparse_weights = True
def fit(self, train_data, lambda_i=0.001, lambda_j=0.001, topK_bpr=200, l1_ratio=0.1, topK_elasticNet=300, alpha_elasticNet=0.0002, sgd_mode='sgd'): print('Fitting...') self.urm = buildURMMatrix(train_data) self.top_pop_songs = train_data['track_id'].value_counts().head( 20).index.values self.col_i_i_recommender.fit(train_data) self.col_u_u_recommender.fit(train_data) self.cbf_recommender.fit(train_data) self.slim_recommender = SLIM_BPR_Cython(train_data) self.slim_recommender.fit(lambda_i=lambda_i, lambda_j=lambda_j, topK=topK_bpr, sgd_mode=sgd_mode) self.slim_elasticNet_recommender = SLIMElasticNetRecommender( train_data) self.slim_elasticNet_recommender.fit(l1_ratio=l1_ratio, topK=topK_elasticNet, alpha=alpha_elasticNet) self.als_recommender.fit(self.urm)
def fit(self, train_data, lambda_i=0.001, lambda_j=0.001, topK=200, sgd_mode='sgd'): print('Fitting...') self.urm = buildURMMatrix(train_data) self.top_pop_songs = train_data['track_id'].value_counts().head(20).index.values self.col_i_i_recommender.fit(train_data) self.cbf_recommender.fit(train_data) self.slim_recommender = SLIM_BPR_Cython(train_data) self.slim_recommender.fit(lambda_i=lambda_i, lambda_j=lambda_j, topK=topK, sgd_mode=sgd_mode)
def fit(self, train_data): self.urm = buildURMMatrix(train_data) self.top_pop_songs = train_data['track_id'].value_counts().head(20).index.values self.col_i_i_recommender.fit(train_data) self.col_u_u_recommender.fit(train_data) self.cbf_recommender.fit(train_data) print("All systems are fitted")
def __init__(self, train_data, recompile_cython=False, num_factors=200): self.URM_train = buildURMMatrix(train_data) self.n_users = self.URM_train.shape[0] self.n_items = self.URM_train.shape[1] self.normalize = False self.num_factors = num_factors if recompile_cython: print("Compiling in Cython") self.runCompilationScript() print("Compilation Complete")
def __init__(self, train_data, save=False, load_model=False, load_model_full=False): self.URM_train = buildURMMatrix(train_data) self.top_pop_songs = train_data['track_id'].value_counts().head( 20).index.values self.save = save self.load_model = load_model self.load_model_full = load_model_full
def __init__(self, train_data, at=10): # CSR is faster during evaluation self.URM_train = check_matrix(buildURMMatrix(train_data), 'csr') self.n_users = self.URM_train.shape[0] self.n_items = self.URM_train.shape[1] self.compute_item_score = self.compute_score_SVD self.top_pop_songs = train_data['track_id'].value_counts().head( 20).index.values self.at = at
def __init__(self, train_data, URM_validation=None, recompile_cython=True, final_model_sparse_weights=True, train_with_sparse_weights=False, symmetric=True): # super(SLIM_BPR_Cython, self).__init__() self.URM_train = check_matrix(buildURMMatrix(train_data), 'csr') self.top_pop_songs = train_data['track_id'].value_counts().head( 20).index.values self.n_users = self.URM_train.shape[0] self.n_items = self.URM_train.shape[1] self.normalize = False self.train_with_sparse_weights = train_with_sparse_weights self.sparse_weights = final_model_sparse_weights if URM_validation is not None: self.URM_validation = URM_validation.copy() else: self.URM_validation = None if self.train_with_sparse_weights: self.sparse_weights = True self.URM_mask = self.URM_train.copy() self.URM_mask.eliminate_zeros() self.symmetric = symmetric if not self.train_with_sparse_weights: n_items = self.URM_train.shape[1] requiredGB = 8 * n_items**2 / 1e+06 if symmetric: requiredGB /= 2 print( "SLIM_BPR_Cython: Estimated memory required for similarity matrix of {} items is {:.2f} MB" .format(n_items, requiredGB)) if recompile_cython: print("Compiling in Cython") self.runCompilationScript() print("Compilation Complete")
def fit(self, train_data): print("Fitting...") self.train_data = train_data self.top_pop_songs = train_data['track_id'].value_counts().head(20).index.values self.urm = buildURMMatrix(train_data) if self.tf_idf: self.urm = normalize_tf_idf(self.urm.T).T self.cosine = Cosine_Similarity(self.urm.T, self.k, self.shrinkage, normalize=True) # self.sym = check_matrix(cosine_similarity(self.urm, dense_output=False), 'csr') self.sym = check_matrix(self.cosine.compute_similarity(), 'csr') # self.sym = check_matrix(self.cosine.compute(self.urm), 'csr') print("Sym mat completed")
def fit(self, train_data, init_URM=None): print("Fitting...") self.train_data = train_data self.top_pop_songs = train_data['track_id'].value_counts().head( 20).index.values if init_URM is None: self.urm = buildURMMatrix(train_data) else: self.urm = init_URM if self.tf_idf: self.urm = normalize_tf_idf(self.urm.T).T self.cosine = Cosine_Similarity(self.urm, self.k, self.shrinkage, normalize=True) # self.cosine = Compute_Similarity_Python(self.urm, self.k, self.shrinkage, normalize=True) self.sym = check_matrix(self.cosine.compute_similarity(), 'csr')
def __init__(self, data): super(FunkSVD, self).__init__() self.URM_train = buildURMMatrix(data)
from slimRS.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython from Base.Evaluation.Evaluator import SequentialEvaluator hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["topK"] = [ 5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800 ] #hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110] hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad", "adam"] hyperparamethers_range_dictionary["lambda_i"] = [0.0, 1e-3, 1e-6, 1e-9] hyperparamethers_range_dictionary["lambda_j"] = [0.0, 1e-3, 1e-6, 1e-9] recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [train_data], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: { 'URM_validation': buildURMMatrix(test_data) }, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: { "playlist_ids": target_data['playlist_id'], "validation_every_n": 5, "stop_on_validation": True, "lower_validatons_allowed": 5 }, DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } evaluator_validation = SequentialEvaluator(buildURMMatrix(test_data), cutoff_list=[10]) evaluator_validation = EvaluatorWrapper(evaluator_validation)
import time from parameterTunning.AbstractClassSearch import DictionaryKeys from loader.loader import save_dataframe, train_data, target_data, test_data, tracks_data from utils.auxUtils import Evaluator, buildURMMatrix, filter_seen import pandas as pd from slimRS.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython import matplotlib.pyplot as plt from parameterTunning.GridSearch import GridSearch from sklearn.model_selection import GridSearchCV URM_train = buildURMMatrix(train_data) URM_test = buildURMMatrix(test_data) rs = SLIM_BPR_Cython(train_data) grid_param = { 'lambda_i': [1e-1, 1e-2, 1e-3, 1e-4], 'lambda_j': [1e-1, 1e-2, 1e-3, 1e-4], 'topK': [300, 400, 500] } evaluator = Evaluator() gd_sr = GridSearchCV(estimator=rs, param_grid=grid_param, scoring=evaluator.evaluate(rs.recommend(target_data["playlist_id"]), test_data), n_jobs=2) gd_sr.fit(URM_train)
from loader.loader import train_data, test_data, tracks_data, full_data, target_data, save_dataframe from utils.auxUtils import buildURMMatrix, Evaluator import numpy as np from svdRS.pureSVD import PureSVDRecommender from collaborative_filtering_RS.col_user_userRS import ColBfUURS from collaborative_filtering_RS.col_item_itemRS import ColBfIIRS from MatrixFactorization.mf_skl import MfNnz from cbfRS.cbfRS import CbfRS import matplotlib.pyplot as pyplot from slimRS.slimElasticNet import SLIMElasticNetRecommender from slimRS.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython from utils.auxUtils import buildICMMatrix from FW_boosting.CFW_D_Similarity import CFW_D_Similarity_Linalg URM_train = buildURMMatrix(train_data) evaluator = Evaluator() profile_length = np.ediff1d(URM_train.indptr) block_size = int(len(profile_length) * 0.05) sorted_users = np.argsort(profile_length) rs_i_i_cf = ColBfIIRS(10, 750, 50, tf_idf=True) rs_i_i_cf.fit(train_data) # predictions_item_item = rs_i_i_cf.recommend(target_data['playlist_id']) map_item_item = [] ''' rs_u_u_cf = ColBfUURS(10, 200, 50, tf_idf=True) rs_u_u_cf.fit(train_data) predictions_user_user = rs_u_u_cf.recommend(target_data['playlist_id']) map_user_user = [] ''' rs_content = CbfRS(tracks_data, 10, 10, 10, tf_idf=True)
from lightfm import LightFM from lightfm.data import Dataset from loader.loader import test_data, train_data, target_data, tracks_data from utils.auxUtils import buildFMMatrix, buildURMMatrix, Evaluator from scipy.sparse import coo_matrix import numpy as np import pandas as pd from tqdm import tqdm e = Evaluator() # Instantiate and train the model alpha = 1e-3 model = LightFM(no_components=30, loss='warp', learning_rate=0.01) # todo add latent factors weights # todo force the dimention of the data matrix urm = coo_matrix(buildURMMatrix(train_data)) print('Fitting...') # todo: item features model.fit(urm, epochs=30, num_threads=4, item_features=item_feature) final_prediction = {} tracks = np.array(tracks_data['track_id'], dtype='int32') for k in tqdm(target_data['playlist_id']): # user_index = np.full(len(tracks), k, dtype='int32') predictions = model.predict(k, tracks) ranking = (np.argsort(predictions)[::-1])[:10] string = ' '.join(str(e) for e in ranking) final_prediction.update({k: string}) df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids']) print(df)
from loader.loader import train_data, test_data, tracks_data, full_data, target_data from utils.auxUtils import buildURMMatrix, Evaluator import numpy as np from svdRS.pureSVD import PureSVDRecommender from collaborative_filtering_RS.col_user_userRS import ColBfUURS from collaborative_filtering_RS.col_item_itemRS import ColBfIIRS from MatrixFactorization.mf_skl import MfNnz from cbfRS.cbfRS import CbfRS import matplotlib.pyplot as pyplot from slimRS.slimElasticNet import SLIMElasticNetRecommender from slimRS.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython URM_train = buildURMMatrix(full_data) evaluator = Evaluator() profile_length = np.ediff1d(URM_train.indptr) block_size = int(len(profile_length) * 0.05) sorted_users = np.argsort(profile_length) rs_i_i_cf = ColBfIIRS(10, 750, 50, tf_idf=True) rs_i_i_cf.fit(train_data) predictions_item_item = rs_i_i_cf.recommend(target_data['playlist_id']) map_item_item = [] rs_u_u_cf = ColBfUURS(10, 200, 50, tf_idf=True) rs_u_u_cf.fit(train_data) predictions_user_user = rs_u_u_cf.recommend(target_data['playlist_id']) map_user_user = [] rs_content = CbfRS(tracks_data, 10, 10, 10, tf_idf=True) rs_content.fit(train_data) predictions_content = rs_content.recommend(target_data['playlist_id'])
r.fit(buildURMMatrix(train_data)) pred = r.recommend(target_data['playlist_id']) temp_map = e.evaluate(pred, test_data) print("Alpha: ", a) print("Reg: ", reg) print("Num factors: ", nf) print("MAP: ", temp_map) ''' ''' for r in regs: rs = IALS_numpy(num_factors=250, reg=r) e = Evaluator() rs.fit(buildURMMatrix(train_data)) pred = rs.recommend(target_data['playlist_id']) temp_map = e.evaluate(pred, test_data) print("Regularization: ", r) print("MAP: ", temp_map) ''' rs = IALS_numpy(num_factors=2, reg=100) e = Evaluator() rs.fit(buildURMMatrix(train_data)) print("GOING") print(rs.get_estimated_ratings()) pred = rs.recommend(target_data['playlist_id']) e.evaluate(pred, test_data) save_dataframe('output/als_250_100_reg_factors.csv', sep=',', dataframe=pred)