def recommend_batch(self, userids, urm=None, N=10, filter_already_liked=True, with_scores=False, items_to_exclude=[], verbose=False): if not self._has_fit(): return None if userids is not None: if len(userids) > 0: matrix = urm[userids] if urm is not None else data.get_urm()[userids] else: return [] else: print('Recommending for all users...') matrix = urm if urm is not None else data.get_urm() # compute the R^ by multiplying R•S self.r_hat = matrix * self._sim_matrix if filter_already_liked: user_profile_batch = matrix self.r_hat[user_profile_batch.nonzero()] = -np.inf if len(items_to_exclude)>0: # TO-DO: test this part because it does not work! self.r_hat = self.r_hat.T self.r_hat[items_to_exclude] = -np.inf self.r_hat = self.r_hat.T recommendations = self._extract_top_items(self.r_hat, N=N) return self._insert_userids_as_first_col(userids, recommendations).tolist()
def __init__(self): self.name = "CFW_D_Similarity_Linalg" # best item based similarity collaborative filter item = CFItemBased() sim_item = item.fit(d.get_urm(), 600, distance=DistanceBasedRecommender.SIM_SPLUS, shrink=10, alpha=0.25, beta=0.5, l=0.25, c=0.5).tocsr() # normalization, matrix similarity values are now among 0 and 1. little push in performances self.S_matrix_target = sim_item / sim_item.max() # best similarity content based content = ContentBasedRecommender() sim_content = content.fit(d.get_urm(), d.get_icm(), k=500, distance=DistanceBasedRecommender.SIM_SPLUS, shrink=500, alpha=0.75, beta=1, l=0.5, c=0.5).tocsr() # normalization, matrix similarity values are now among 0 and 1. little push in performances self.S_matrix_contentKNN = sim_content / sim_content.max()
def run(self, epochs=70, batch_size=1000, lambda_i=0.0, lambda_j=0.0, learning_rate=0.01, topK=1500, sgd_mode='adagrad', export_results=True, export_r_hat=False): """ meant as a shortcut to run the model after the validation procedure, allowing the export of the scores on the playlists or of the estimated csr matrix :param epochs(int) :param batch_size(int) after how many items the params should be updated :param lambda_i(float) first regularization term :param lambda_j(float) second regularization term :param learning_rate(float) algorithm learning rate :param topK(int) how many elements should be taken into account while computing URM*W :param sgd_mode(string) optimization algorithm :param export_results(bool) export a ready-to-kaggle csv with the predicted songs for each playlist :param export_r_hat(bool) whether to export or not the estimated csr matrix """ self.fit(URM_train=d.get_urm(), epochs=epochs, URM_test=None, user_ids=None, batch_size=batch_size, validate_every_N_epochs=1, start_validation_after_N_epochs=epochs + 1, lambda_i=lambda_i, lambda_j=lambda_j, learning_rate=learning_rate, topK=topK, sgd_mode=sgd_mode) if export_results: print('exporting results') recs = self.recommend_batch(d.get_target_playlists(), N=10, urm=d.get_urm(), filter_already_liked=True, with_scores=False, items_to_exclude=[], verbose=False) importexport.exportcsv( recs, 'submission', self._print(epochs=epochs, batch_size=batch_size, lambda_i=lambda_i, lambda_j=lambda_j, learning_rate=learning_rate, topK=topK, sgd_mode=sgd_mode)) elif export_r_hat: print('saving estimated urm') self.save_r_hat()
def option_selection_evaluation(type): if type == 'SIM': # LET USER CHOOSE OPTIONS log.success('STUDY HARD | WORK HARD | F**K HARD |') log.warning('\'s\' for save the r_hat in saved_r_hat_evaluation') log.warning('\'m\' for compute the MAP@10') option = input()[0] if option == 's': urm_filter_tracks = data.get_urm_train_1() rel_path = 'saved_r_hat_evaluation/' log.success('SELECT A NAME FOR THE MATRIX') name = input() elif option == 'm': urm_filter_tracks = data.get_urm_train_1() rel_path = None name = None else: log.warning( 'CON UNA MANO SELEZIONI E CON L\'ALTRA FAI UNA SEGA AL TUO RAGAZZO...' ) exit(0) return name, urm_filter_tracks, rel_path elif type == 'R_HAT': # LET USER CHOOSE OPTIONS log.success('STUDY HARD | WORK HARD | F**K HARD |') log.warning('\'s\' for save the r_hat in saved_r_hat') log.warning('\'e\' for EXPORT and get a SUB') option = input()[0] if option == 's': log.success('SELECT A NAME FOR THE MATRIX') name = input() urm_filter_tracks = data.get_urm() rel_path = 'saved_r_hat/' EXPORT = False elif option == 'e': log.success('SELECT A NAME FOR THE SUB') name = input() urm_filter_tracks = data.get_urm() rel_path = None EXPORT = True else: log.warning( 'CON UNA MANO SELEZIONI E CON L\'ALTRA FAI UNA SEGA AL TUO RAGAZZO...' ) exit(0) return name, urm_filter_tracks, rel_path, EXPORT
def fit(self, URM, n_factors=10, learning_rate=1e-4, epochs=10, user_regularization=0.001, positive_item_regularization=0.001, negative_item_regularization=0.001, evaluate_every=1): self.URM = URM self.epochs = epochs self.n_users = self.URM.shape[0] self.n_items = self.URM.shape[1] e = MFBPR_Epoch( URM, n_factors=n_factors, learning_rate=learning_rate, user_regularization=user_regularization, positive_item_regularization=positive_item_regularization, negative_item_regularization=negative_item_regularization) print('Fitting MFBPR...') for numEpoch in range(self.epochs): print('Epoch:', numEpoch) e.epochIteration() if (numEpoch + 1) % evaluate_every == 0: self.user_factors, self.item_factors = e.get_user_item_factors( ) recs = self.recommend_batch(userids=d.get_target_playlists()) self.evaluate(recs, d.get_urm_test_1()) self.user_factors, self.item_factors = e.get_user_item_factors() # let's see how fine it performs in the test set: # getting as positive sample a semple in the test set but not in the training trials = 10000 count_wrong = 0 for _ in range(trials): test = d.get_urm_test_1() user_id = np.random.choice(self.n_users) user_seen_items = d.get_urm()[user_id, :].indices test_items = test[user_id, :].indices pos_item_id = np.random.choice(test_items) neg_item_selected = False while (not neg_item_selected): neg_item_id = np.random.randint(0, self.n_items) if (neg_item_id not in user_seen_items): neg_item_selected = True xui = np.dot(self.user_factors[user_id, :], self.item_factors[pos_item_id, :]) xuj = np.dot(self.user_factors[user_id, :], self.item_factors[neg_item_id, :]) xuij = xui - xuj if xuij < 0: count_wrong += 1 # print('u: {}, i: {}, j: {}. xui - xuj: {}'.format(user_id, pos_item_id, neg_item_id, xuij)) print('percentange of wrong preferences in test set: {}'.format( count_wrong / trials))
def recommend_batch(self, userids, N=10, urm=None, filter_already_liked=True, with_scores=False, items_to_exclude=[], verbose=False): if not self._has_fit(): return None R = data.get_urm() if urm is None else urm if userids is None or not len(userids) > 0: print('Recommending for all users...') # compute the R^ by multiplying: R•S or S•R if self._matrix_mul_order == 'inverse': R_hat = sim.dot_product(self._sim_matrix, R, target_rows=userids, k=R.shape[0], format_output='csr', verbose=verbose) else: R_hat = sim.dot_product(R, self._sim_matrix, target_rows=userids, k=R.shape[0], format_output='csr', verbose=verbose) if filter_already_liked: # remove from the R^ the items already in the R R_hat[R.nonzero()] = -np.inf if len(items_to_exclude) > 0: # TO-DO: test this part because it does not work! R_hat = R_hat.T R_hat[items_to_exclude] = -np.inf R_hat = R_hat.T # make recommendations only for the target rows if len(userids) > 0: R_hat = R_hat[userids] else: userids = [i for i in range(R_hat.shape[0])] recommendations = self._extract_top_items(R_hat, N=N) return self._insert_userids_as_first_col(userids, recommendations).tolist()
def run(self, normalize_similarity=False, add_zeros_quota=1, loss_tolerance=1e-6, iteration_limit=30, damp_coeff=1, use_incremental=False, export_results=True, export_r_hat=False, export_for_validation=False): if export_r_hat and export_for_validation: urm = d.get_urm_train_1() else: urm = d.get_urm() self.fit(ICM=d.get_icm(), URM_train=urm, normalize_similarity=normalize_similarity, add_zeros_quota=add_zeros_quota, loss_tolerance=loss_tolerance, iteration_limit=iteration_limit, damp_coeff=damp_coeff, use_incremental=use_incremental) if export_results: print('exporting results') recs = self.recommend_batch(d.get_target_playlists(), N=10, urm=urm, filter_already_liked=True, with_scores=False, items_to_exclude=[], verbose=False) importexport.exportcsv( recs, 'submission', self._print(normalize_similarity=normalize_similarity, add_zeros_quota=add_zeros_quota, loss_tolerance=loss_tolerance, iteration_limit=iteration_limit, damp_coeff=damp_coeff, use_incremental=use_incremental)) elif export_r_hat: print('saving estimated urm') self.save_r_hat(export_for_validation)
def fit(self, urm_train=data.get_urm(), factors=550, regularization=0.15, iterations=300, alpha=25): """ train the model finding the two matrices U and V: U*V.T=R (R is the extimated URM) Parameters ---------- :param (csr) urm_train: The URM matrix of shape (number_users, number_items). :param (int) factors: How many latent features we want to compute. :param (float) regularization: lambda_val regularization value :param (int) iterations: How many times we alternate between fixing and updating our user and item vectors :param (int) alpha: The rate in which we'll increase our confidence in a preference with more interactions. Returns ------- :return (csr_matrix) user_vecs: matrix N_user x factors :return (csr_matrix) item_vecs: matrix N_item x factors """ self.urm = urm_train sparse_item_user = self.urm.T # Initialize the als model and fit it using the sparse item-user matrix self._model = implicit.als.AlternatingLeastSquares( factors=factors, regularization=regularization, iterations=iterations) # Calculate the confidence by multiplying it by our alpha value. data_conf = (sparse_item_user * alpha).astype('double') # Fit the model self._model.fit(data_conf) # set the user and item vectors for our model R = user_vecs * item_vecs.T self.user_vecs = self._model.user_factors self.item_vecs = self._model.item_factors
def test(self, distance=DistanceBasedRecommender.SIM_SPLUS, k=600, shrink=10, threshold=0, alpha=0.25, beta=0.5, l=0.5, c=0.25): """ meant as a shortcut to run the model after the validation procedure, allowing the export of the scores on the playlists or of the estimated csr matrix """ recs, map = self.run(urm=d.get_urm(), icm=d.get_icm(), targetids=d.get_target_playlists(), distance=distance, k=k, shrink=shrink, threshold=threshold, alpha=alpha, beta=beta, l=l, c=c, export=export_results) if export_r_hat: print('saving estimated urm') self.save_r_hat() return recs, map
def run(self, urm_train=None, urm=None, urm_test=None, targetids=None, factors=100, regularization=0.01, iterations=100, alpha=25, with_scores=False, export=True, verbose=True): """ Run the model and export the results to a file Returns ------- :return: recs: (list) recommendations :return: map10: (float) MAP10 for the provided recommendations """ _urm_train = data.get_urm_train_1() _urm = data.get_urm() _icm = data.get_icm() _urm_test = data.get_urm_test_1() _targetids = data.get_target_playlists() # _targetids = data.get_all_playlists() start = time.time() urm_train = _urm_train if urm_train is None else urm_train urm = _urm if urm is None else urm urm_test = _urm_test if urm_test is None else urm_test targetids = _targetids if targetids is None else targetids self.fit(l1_ratio=0.1, positive_only=True, alpha=1e-4, fit_intercept=False, copy_X=False, precompute=False, selection='random', max_iter=100, topK=100, tol=1e-4, workers=multiprocessing.cpu_count()) recs = self.recommend_batch(userids=targetids, with_scores=with_scores, verbose=verbose) map10 = None if len(recs) > 0: map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose) else: log.warning('No recommendations available, skip evaluation') if export: exportcsv(recs, path='submission', name=self.name, verbose=verbose) if verbose: log.info('Run in: {:.2f}s'.format(time.time() - start)) return recs, map10
arg = input()[0] print() if arg == 't': model = P3alphaRecommender(data.get_urm_train_1()) model.fit(topK=900, alpha=1.2, min_rating=0, implicit=True, normalize_similarity=False) recs = model.recommend_batch(data.get_target_playlists()) evaluate(recs, test_urm=data.get_urm_test_1()) elif arg == 'r': log.info('Wanna save for evaluation (y/n)?') if input()[0] == 'y': model = P3alphaRecommender(data.get_urm()) path = 'raw_data/saved_r_hat_evaluation/' else: model = P3alphaRecommender(data.get_urm_train_1()) path = 'raw_data/saved_r_hat/' model.fit(topK=500, alpha=1.7, min_rating=1, normalize_similarity=True) print('Saving the R^...') r_hat = sps.csr_matrix( np.dot(model.URM_train[data.get_target_playlists()], model.W_sparse)) sps.save_npz(path + model.RECOMMENDER_NAME, r_hat) elif arg == 's': model.fit(topK=500, alpha=1.7, min_rating=1, normalize_similarity=True) print('Saving the similarity matrix...')
from recommenders.collaborative_filtering.SLIM_RMSE import SLIMElasticNetRecommender import data.data as d import inout.importexport as io urm = d.get_urm() urm_train = d.get_urm_train() target_id = d.get_all_playlists() urm_test = d.get_urm_test() t_id = d.get_target_playlists() recommender = SLIMElasticNetRecommender(urm) recommender.fit(topK=100, alpha=1e-4, l1_ratio=0.1, max_iter=100, tol=1e-4) recommender.save_r_hat() #recommendations = recommender.recommend_batch(userids=t_id) #map10 = recommender.evaluate(recommendations, test_urm=urm_test) #print('map@10: {}'.format(map10)) #io.exportcsv(recommendations, path='submissions', name='slim_rmse')
for ui in use_incremental: print( self._print(normalize_similarity=ns, add_zeros_quota=dc, loss_tolerance=lt, iteration_limit=il, damp_coeff=dc, use_incremental=ui)) self.fit(ICM=d.get_icm(), URM_train=d.get_urm_train_1(), normalize_similarity=ns, add_zeros_quota=adq, loss_tolerance=lt, iteration_limit=il, damp_coeff=dc, use_incremental=ui) recs = self.recommend_batch( user_ids, urm=d.get_urm_train_1()) r.evaluate(recs, d.get_urm_test_1()) if log_path != None: sys.stdout = orig_stdout f.close() #0.039 r = CFW() r.fit(URM_train=data.get_urm()) sps.save_npz('raw_data/saved_sim_matrix/CFW', r.W_sparse) # r.run(export_results=False, export_r_hat=True, export_for_validation=False)
MAP@k: (float) MAP for the provided recommendations """ if not at_k > 0: log.error('Invalid value of k {}'.format(at_k)) return aps = 0.0 for r in recommendations: row = test_urm.getrow(r[0]).indices m = min(at_k, len(row)) ap = 0.0 n_elems_found = 0.0 for j in range(1, m + 1): if r[j] in row: n_elems_found += 1 ap = ap + n_elems_found / j if m > 0: ap = ap / m aps = aps + ap result = aps / len(recommendations) if verbose: log.warning('MAP: {}'.format(result)) return result rec = RP3betaRecommender(data.get_urm()) rec.fit() sps.save_npz('raw_data/saved_sim_matrix/RP3BETA', rec.W_sparse) #recs = rec.recommend_batch(data.get_target_playlists()) #rec.evaluate(recs, test_urm=data.get_urm_test_1())