Esempio n. 1
0
    def recommend_batch(self, userids, urm=None, N=10, filter_already_liked=True, with_scores=False, items_to_exclude=[],
                        verbose=False):
        if not self._has_fit():
            return None

        if userids is not None:
            if len(userids) > 0:
                matrix = urm[userids] if urm is not None else data.get_urm()[userids]
            else:
                return []
        else:
            print('Recommending for all users...')
            matrix = urm if urm is not None else data.get_urm()

        # compute the R^ by multiplying R•S
        self.r_hat = matrix * self._sim_matrix
        
        if filter_already_liked:
            user_profile_batch = matrix
            self.r_hat[user_profile_batch.nonzero()] = -np.inf
        if len(items_to_exclude)>0:
            # TO-DO: test this part because it does not work!
            self.r_hat = self.r_hat.T
            self.r_hat[items_to_exclude] = -np.inf
            self.r_hat = self.r_hat.T
        
        recommendations = self._extract_top_items(self.r_hat, N=N)
        return self._insert_userids_as_first_col(userids, recommendations).tolist()
Esempio n. 2
0
    def __init__(self):
        self.name = "CFW_D_Similarity_Linalg"

        # best item based similarity collaborative filter
        item = CFItemBased()
        sim_item = item.fit(d.get_urm(),
                            600,
                            distance=DistanceBasedRecommender.SIM_SPLUS,
                            shrink=10,
                            alpha=0.25,
                            beta=0.5,
                            l=0.25,
                            c=0.5).tocsr()
        # normalization, matrix similarity values are now among 0 and 1. little push in performances
        self.S_matrix_target = sim_item / sim_item.max()

        # best similarity content based
        content = ContentBasedRecommender()
        sim_content = content.fit(d.get_urm(),
                                  d.get_icm(),
                                  k=500,
                                  distance=DistanceBasedRecommender.SIM_SPLUS,
                                  shrink=500,
                                  alpha=0.75,
                                  beta=1,
                                  l=0.5,
                                  c=0.5).tocsr()
        # normalization, matrix similarity values are now among 0 and 1. little push in performances
        self.S_matrix_contentKNN = sim_content / sim_content.max()
Esempio n. 3
0
    def run(self,
            epochs=70,
            batch_size=1000,
            lambda_i=0.0,
            lambda_j=0.0,
            learning_rate=0.01,
            topK=1500,
            sgd_mode='adagrad',
            export_results=True,
            export_r_hat=False):
        """
        meant as a shortcut to run the model after the validation procedure,
        allowing the export of the scores on the playlists or of the estimated csr matrix

        :param epochs(int)
        :param batch_size(int) after how many items the params should be updated
        :param lambda_i(float) first regularization term
        :param lambda_j(float) second regularization term
        :param learning_rate(float) algorithm learning rate
        :param topK(int) how many elements should be taken into account while computing URM*W
        :param sgd_mode(string) optimization algorithm
        :param export_results(bool) export a ready-to-kaggle csv with the predicted songs for each playlist
        :param export_r_hat(bool) whether to export or not the estimated csr matrix
        """

        self.fit(URM_train=d.get_urm(),
                 epochs=epochs,
                 URM_test=None,
                 user_ids=None,
                 batch_size=batch_size,
                 validate_every_N_epochs=1,
                 start_validation_after_N_epochs=epochs + 1,
                 lambda_i=lambda_i,
                 lambda_j=lambda_j,
                 learning_rate=learning_rate,
                 topK=topK,
                 sgd_mode=sgd_mode)
        if export_results:
            print('exporting results')
            recs = self.recommend_batch(d.get_target_playlists(),
                                        N=10,
                                        urm=d.get_urm(),
                                        filter_already_liked=True,
                                        with_scores=False,
                                        items_to_exclude=[],
                                        verbose=False)
            importexport.exportcsv(
                recs, 'submission',
                self._print(epochs=epochs,
                            batch_size=batch_size,
                            lambda_i=lambda_i,
                            lambda_j=lambda_j,
                            learning_rate=learning_rate,
                            topK=topK,
                            sgd_mode=sgd_mode))
        elif export_r_hat:
            print('saving estimated urm')
            self.save_r_hat()
Esempio n. 4
0
def option_selection_evaluation(type):
    if type == 'SIM':
        # LET USER CHOOSE OPTIONS
        log.success('STUDY HARD | WORK HARD | F**K HARD |')
        log.warning('\'s\' for save the r_hat in saved_r_hat_evaluation')
        log.warning('\'m\' for compute the MAP@10')
        option = input()[0]

        if option == 's':
            urm_filter_tracks = data.get_urm_train_1()
            rel_path = 'saved_r_hat_evaluation/'
            log.success('SELECT A NAME FOR THE MATRIX')
            name = input()
        elif option == 'm':
            urm_filter_tracks = data.get_urm_train_1()
            rel_path = None
            name = None
        else:
            log.warning(
                'CON UNA MANO SELEZIONI E CON L\'ALTRA FAI UNA SEGA AL TUO RAGAZZO...'
            )
            exit(0)
        return name, urm_filter_tracks, rel_path
    elif type == 'R_HAT':
        # LET USER CHOOSE OPTIONS
        log.success('STUDY HARD | WORK HARD | F**K HARD |')
        log.warning('\'s\' for save the r_hat in saved_r_hat')
        log.warning('\'e\' for EXPORT and get a SUB')
        option = input()[0]

        if option == 's':
            log.success('SELECT A NAME FOR THE MATRIX')
            name = input()
            urm_filter_tracks = data.get_urm()
            rel_path = 'saved_r_hat/'
            EXPORT = False
        elif option == 'e':
            log.success('SELECT A NAME FOR THE SUB')
            name = input()
            urm_filter_tracks = data.get_urm()
            rel_path = None
            EXPORT = True
        else:
            log.warning(
                'CON UNA MANO SELEZIONI E CON L\'ALTRA FAI UNA SEGA AL TUO RAGAZZO...'
            )
            exit(0)
        return name, urm_filter_tracks, rel_path, EXPORT
Esempio n. 5
0
    def fit(self,
            URM,
            n_factors=10,
            learning_rate=1e-4,
            epochs=10,
            user_regularization=0.001,
            positive_item_regularization=0.001,
            negative_item_regularization=0.001,
            evaluate_every=1):
        self.URM = URM
        self.epochs = epochs
        self.n_users = self.URM.shape[0]
        self.n_items = self.URM.shape[1]

        e = MFBPR_Epoch(
            URM,
            n_factors=n_factors,
            learning_rate=learning_rate,
            user_regularization=user_regularization,
            positive_item_regularization=positive_item_regularization,
            negative_item_regularization=negative_item_regularization)
        print('Fitting MFBPR...')

        for numEpoch in range(self.epochs):
            print('Epoch:', numEpoch)
            e.epochIteration()
            if (numEpoch + 1) % evaluate_every == 0:
                self.user_factors, self.item_factors = e.get_user_item_factors(
                )
                recs = self.recommend_batch(userids=d.get_target_playlists())
                self.evaluate(recs, d.get_urm_test_1())

        self.user_factors, self.item_factors = e.get_user_item_factors()

        # let's see how fine it performs in the test set:
        # getting as positive sample a semple in the test set but not in the training
        trials = 10000
        count_wrong = 0
        for _ in range(trials):
            test = d.get_urm_test_1()
            user_id = np.random.choice(self.n_users)
            user_seen_items = d.get_urm()[user_id, :].indices
            test_items = test[user_id, :].indices
            pos_item_id = np.random.choice(test_items)
            neg_item_selected = False
            while (not neg_item_selected):
                neg_item_id = np.random.randint(0, self.n_items)
                if (neg_item_id not in user_seen_items):
                    neg_item_selected = True
            xui = np.dot(self.user_factors[user_id, :],
                         self.item_factors[pos_item_id, :])
            xuj = np.dot(self.user_factors[user_id, :],
                         self.item_factors[neg_item_id, :])
            xuij = xui - xuj
            if xuij < 0:
                count_wrong += 1
            # print('u: {}, i: {}, j: {}. xui - xuj: {}'.format(user_id, pos_item_id, neg_item_id, xuij))
        print('percentange of wrong preferences in test set: {}'.format(
            count_wrong / trials))
Esempio n. 6
0
    def recommend_batch(self,
                        userids,
                        N=10,
                        urm=None,
                        filter_already_liked=True,
                        with_scores=False,
                        items_to_exclude=[],
                        verbose=False):
        if not self._has_fit():
            return None

        R = data.get_urm() if urm is None else urm

        if userids is None or not len(userids) > 0:
            print('Recommending for all users...')

        # compute the R^ by multiplying: R•S or S•R
        if self._matrix_mul_order == 'inverse':
            R_hat = sim.dot_product(self._sim_matrix,
                                    R,
                                    target_rows=userids,
                                    k=R.shape[0],
                                    format_output='csr',
                                    verbose=verbose)
        else:
            R_hat = sim.dot_product(R,
                                    self._sim_matrix,
                                    target_rows=userids,
                                    k=R.shape[0],
                                    format_output='csr',
                                    verbose=verbose)

        if filter_already_liked:
            # remove from the R^ the items already in the R
            R_hat[R.nonzero()] = -np.inf
        if len(items_to_exclude) > 0:
            # TO-DO: test this part because it does not work!
            R_hat = R_hat.T
            R_hat[items_to_exclude] = -np.inf
            R_hat = R_hat.T

        # make recommendations only for the target rows
        if len(userids) > 0:
            R_hat = R_hat[userids]
        else:
            userids = [i for i in range(R_hat.shape[0])]
        recommendations = self._extract_top_items(R_hat, N=N)
        return self._insert_userids_as_first_col(userids,
                                                 recommendations).tolist()
Esempio n. 7
0
    def run(self,
            normalize_similarity=False,
            add_zeros_quota=1,
            loss_tolerance=1e-6,
            iteration_limit=30,
            damp_coeff=1,
            use_incremental=False,
            export_results=True,
            export_r_hat=False,
            export_for_validation=False):
        if export_r_hat and export_for_validation:
            urm = d.get_urm_train_1()
        else:
            urm = d.get_urm()

        self.fit(ICM=d.get_icm(),
                 URM_train=urm,
                 normalize_similarity=normalize_similarity,
                 add_zeros_quota=add_zeros_quota,
                 loss_tolerance=loss_tolerance,
                 iteration_limit=iteration_limit,
                 damp_coeff=damp_coeff,
                 use_incremental=use_incremental)
        if export_results:
            print('exporting results')
            recs = self.recommend_batch(d.get_target_playlists(),
                                        N=10,
                                        urm=urm,
                                        filter_already_liked=True,
                                        with_scores=False,
                                        items_to_exclude=[],
                                        verbose=False)
            importexport.exportcsv(
                recs, 'submission',
                self._print(normalize_similarity=normalize_similarity,
                            add_zeros_quota=add_zeros_quota,
                            loss_tolerance=loss_tolerance,
                            iteration_limit=iteration_limit,
                            damp_coeff=damp_coeff,
                            use_incremental=use_incremental))
        elif export_r_hat:
            print('saving estimated urm')
            self.save_r_hat(export_for_validation)
Esempio n. 8
0
    def fit(self,
            urm_train=data.get_urm(),
            factors=550,
            regularization=0.15,
            iterations=300,
            alpha=25):
        """
        train the model finding the two matrices U and V: U*V.T=R  (R is the extimated URM)

        Parameters
        ----------
        :param (csr) urm_train: The URM matrix of shape (number_users, number_items).
        :param (int) factors: How many latent features we want to compute.
        :param (float) regularization: lambda_val regularization value
        :param (int) iterations: How many times we alternate between fixing and updating our user and item vectors
        :param (int) alpha: The rate in which we'll increase our confidence in a preference with more interactions.

        Returns
        -------
        :return (csr_matrix) user_vecs: matrix N_user x factors
        :return (csr_matrix) item_vecs: matrix N_item x factors

        """
        self.urm = urm_train
        sparse_item_user = self.urm.T

        # Initialize the als model and fit it using the sparse item-user matrix
        self._model = implicit.als.AlternatingLeastSquares(
            factors=factors,
            regularization=regularization,
            iterations=iterations)

        # Calculate the confidence by multiplying it by our alpha value.
        data_conf = (sparse_item_user * alpha).astype('double')

        # Fit the model
        self._model.fit(data_conf)

        # set the user and item vectors for our model R = user_vecs * item_vecs.T
        self.user_vecs = self._model.user_factors
        self.item_vecs = self._model.item_factors
Esempio n. 9
0
    def test(self, distance=DistanceBasedRecommender.SIM_SPLUS, k=600, shrink=10, threshold=0, alpha=0.25, beta=0.5, l=0.5, c=0.25):
        """
        meant as a shortcut to run the model after the validation procedure,
        allowing the export of the scores on the playlists or of the estimated csr matrix
        """
        recs, map = self.run(urm=d.get_urm(),
                             icm=d.get_icm(),
                             targetids=d.get_target_playlists(),
                             distance=distance,
                             k=k, shrink=shrink,
                             threshold=threshold,
                             alpha=alpha,
                             beta=beta,
                             l=l,
                             c=c,
                             export=export_results)

        if export_r_hat:
            print('saving estimated urm')
            self.save_r_hat()
        return recs, map
Esempio n. 10
0
    def run(self,
            urm_train=None,
            urm=None,
            urm_test=None,
            targetids=None,
            factors=100,
            regularization=0.01,
            iterations=100,
            alpha=25,
            with_scores=False,
            export=True,
            verbose=True):
        """
        Run the model and export the results to a file

        Returns
        -------
        :return: recs: (list) recommendations
        :return: map10: (float) MAP10 for the provided recommendations
        """
        _urm_train = data.get_urm_train_1()
        _urm = data.get_urm()
        _icm = data.get_icm()
        _urm_test = data.get_urm_test_1()
        _targetids = data.get_target_playlists()
        # _targetids = data.get_all_playlists()

        start = time.time()

        urm_train = _urm_train if urm_train is None else urm_train
        urm = _urm if urm is None else urm
        urm_test = _urm_test if urm_test is None else urm_test
        targetids = _targetids if targetids is None else targetids

        self.fit(l1_ratio=0.1,
                 positive_only=True,
                 alpha=1e-4,
                 fit_intercept=False,
                 copy_X=False,
                 precompute=False,
                 selection='random',
                 max_iter=100,
                 topK=100,
                 tol=1e-4,
                 workers=multiprocessing.cpu_count())
        recs = self.recommend_batch(userids=targetids,
                                    with_scores=with_scores,
                                    verbose=verbose)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs, path='submission', name=self.name, verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time() - start))

        return recs, map10
Esempio n. 11
0
    arg = input()[0]
    print()

    if arg == 't':
        model = P3alphaRecommender(data.get_urm_train_1())
        model.fit(topK=900,
                  alpha=1.2,
                  min_rating=0,
                  implicit=True,
                  normalize_similarity=False)
        recs = model.recommend_batch(data.get_target_playlists())
        evaluate(recs, test_urm=data.get_urm_test_1())
    elif arg == 'r':
        log.info('Wanna save for evaluation (y/n)?')
        if input()[0] == 'y':
            model = P3alphaRecommender(data.get_urm())
            path = 'raw_data/saved_r_hat_evaluation/'
        else:
            model = P3alphaRecommender(data.get_urm_train_1())
            path = 'raw_data/saved_r_hat/'

        model.fit(topK=500, alpha=1.7, min_rating=1, normalize_similarity=True)

        print('Saving the R^...')
        r_hat = sps.csr_matrix(
            np.dot(model.URM_train[data.get_target_playlists()],
                   model.W_sparse))
        sps.save_npz(path + model.RECOMMENDER_NAME, r_hat)
    elif arg == 's':
        model.fit(topK=500, alpha=1.7, min_rating=1, normalize_similarity=True)
        print('Saving the similarity matrix...')
Esempio n. 12
0
from recommenders.collaborative_filtering.SLIM_RMSE import SLIMElasticNetRecommender
import data.data as d
import inout.importexport as io

urm = d.get_urm()
urm_train = d.get_urm_train()
target_id = d.get_all_playlists()
urm_test = d.get_urm_test()
t_id = d.get_target_playlists()


recommender = SLIMElasticNetRecommender(urm)
recommender.fit(topK=100, alpha=1e-4, l1_ratio=0.1, max_iter=100, tol=1e-4)
recommender.save_r_hat()
#recommendations = recommender.recommend_batch(userids=t_id)
#map10 = recommender.evaluate(recommendations, test_urm=urm_test)
#print('map@10: {}'.format(map10))
#io.exportcsv(recommendations, path='submissions', name='slim_rmse')


Esempio n. 13
0
                            for ui in use_incremental:
                                print(
                                    self._print(normalize_similarity=ns,
                                                add_zeros_quota=dc,
                                                loss_tolerance=lt,
                                                iteration_limit=il,
                                                damp_coeff=dc,
                                                use_incremental=ui))
                                self.fit(ICM=d.get_icm(),
                                         URM_train=d.get_urm_train_1(),
                                         normalize_similarity=ns,
                                         add_zeros_quota=adq,
                                         loss_tolerance=lt,
                                         iteration_limit=il,
                                         damp_coeff=dc,
                                         use_incremental=ui)

                                recs = self.recommend_batch(
                                    user_ids, urm=d.get_urm_train_1())
                                r.evaluate(recs, d.get_urm_test_1())
        if log_path != None:
            sys.stdout = orig_stdout
            f.close()


#0.039
r = CFW()
r.fit(URM_train=data.get_urm())
sps.save_npz('raw_data/saved_sim_matrix/CFW', r.W_sparse)
# r.run(export_results=False, export_r_hat=True, export_for_validation=False)
Esempio n. 14
0
        MAP@k: (float) MAP for the provided recommendations
        """
        if not at_k > 0:
            log.error('Invalid value of k {}'.format(at_k))
            return

        aps = 0.0
        for r in recommendations:
            row = test_urm.getrow(r[0]).indices
            m = min(at_k, len(row))

            ap = 0.0
            n_elems_found = 0.0
            for j in range(1, m + 1):
                if r[j] in row:
                    n_elems_found += 1
                    ap = ap + n_elems_found / j
            if m > 0:
                ap = ap / m
                aps = aps + ap

        result = aps / len(recommendations)
        if verbose:
            log.warning('MAP: {}'.format(result))
        return result

rec = RP3betaRecommender(data.get_urm())
rec.fit()
sps.save_npz('raw_data/saved_sim_matrix/RP3BETA', rec.W_sparse)
#recs = rec.recommend_batch(data.get_target_playlists())
#rec.evaluate(recs, test_urm=data.get_urm_test_1())