Esempio n. 1
0
    def run(self,
            num_factors,
            urm_train=None,
            urm=None,
            urm_test=None,
            targetids=None,
            with_scores=False,
            export=True,
            verbose=True):
        """
        Run the model and export the results to a file

        Parameters
        ----------
        num_factors : int, number of latent factors
        urm : csr matrix, URM. If None, used: data.get_urm_train(). This should be the
            entire URM for which the targetids corresponds to the row indexes.
        urm_test : csr matrix, urm where to test the model. If None, use: data.get_urm_test()
        targetids : list, target user ids. If None, use: data.get_target_playlists()

        Returns
        -------
        recs: (list) recommendations
        map10: (float) MAP10 for the provided recommendations
        """
        _urm = data.get_urm_train()
        _icm = data.get_icm()
        _urm_test = data.get_urm_test()
        _targetids = data.get_target_playlists()
        #_targetids = data.get_all_playlists()

        start = time.time()

        urm_train = _urm if urm_train is None else urm_train
        #urm = _urm if urm is None else urm
        urm_test = _urm_test if urm_test is None else urm_test
        targetids = _targetids if targetids is None else targetids

        self.fit(urm_train=urm_train, num_factors=num_factors)
        recs = self.recommend_batch(userids=targetids)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs, path='submission', name=self.name, verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time() - start))

        return recs, map10
Esempio n. 2
0
    def validate(self,
                 factors_array,
                 iteration_array,
                 urm_train=data.get_urm_train(),
                 urm_test=data.get_urm_test(),
                 verbose=True,
                 write_on_file=True,
                 userids=data.get_target_playlists(),
                 N=10,
                 filter_already_liked=True,
                 items_to_exclude=[]):

        #create the initial model
        recommender = Pure_SVD()

        path = 'validation_results/'
        name = 'pure_SVD'
        folder = time.strftime('%d-%m-%Y')
        filename = '{}/{}/{}{}.csv'.format(path, folder, name,
                                           time.strftime('_%H-%M-%S'))
        # create dir if not exists
        os.makedirs(os.path.dirname(filename), exist_ok=True)

        with open(filename, 'w') as out:
            for f in factors_array:
                for i in iteration_array:
                    #train the model with the parameters
                    if verbose:
                        print(
                            '\n\nTraining PURE_SVD with\n Factors: {}\n Iteration: {}\n'
                            .format(f, i))
                        print('\n training phase...')
                    recommender.fit(urm_train=urm_train,
                                    num_factors=f,
                                    iteration=i)

                    #get the recommendations from the trained model
                    recommendations = recommender.recommend_batch(
                        userids=userids,
                        N=N,
                        filter_already_liked=filter_already_liked,
                        items_to_exclude=items_to_exclude)
                    #evaluate the model with map10
                    map10 = recommender.evaluate(recommendations,
                                                 test_urm=urm_test)
                    if verbose:
                        print('map@10: {}'.format(map10))

                    #write on external files on folder models_validation
                    if write_on_file:
                        out.write(
                            '\n\nFactors: {}\n Iteration: {}\n evaluation map@10: {}'
                            .format(f, i, map10))
Esempio n. 3
0
    def validate(self, epochs=200, user_ids=d.get_target_playlists(),
            batch_size = [1000], validate_every_N_epochs = 5, start_validation_after_N_epochs = 0, lambda_i = [0.0],
            lambda_j = [0.0], learning_rate = [0.01], topK = [200], sgd_mode='adagrad', log_path=None):
        """
        train the model finding matrix W
        :param epochs(int)
        :param batch_size(list) after how many items the params should be updated
        :param lambda_i(list) first regularization term
        :param lambda_j(list) second regularization term
        :param learning_rate(list) algorithm learning rate
        :param topK(list) how many elements should be taken into account while computing URM*W
        :param sgd_mode(string) optimization algorithm
        :param user_ids(list) needed if we'd like to perform validation
        :param validate_every_N_epochs(int) how often the MAP evaluation should be displayed
        :param start_validation_after_N_epochs(int)
        :param log_path(string) folder to which the validation results should be saved
        """
        if log_path != None:
            orig_stdout = sys.stdout
            f = open(log_path + '/' + self.name + ' ' + time.strftime('_%H-%M-%S') + ' ' +
                     time.strftime('%d-%m-%Y') + '.txt', 'w')
            sys.stdout = f

        for li in lambda_i:
            for lj in lambda_j:
                for tk in topK:
                    for lr in learning_rate:
                        for b in batch_size:
                            print(self._print(epochs=epochs,
                                              batch_size=b,
                                              lambda_i=li,
                                              lambda_j=lj,
                                              learning_rate=lr,
                                              topK=tk,
                                              sgd_mode=sgd_mode))
                            s.fit(URM_train=d.get_urm_train(),
                                  epochs=epochs,
                                  URM_test=d.get_urm_test(),
                                  user_ids=user_ids,
                                  batch_size=b,
                                  validate_every_N_epochs=validate_every_N_epochs,
                                  start_validation_after_N_epochs=start_validation_after_N_epochs,
                                  lambda_i = li,
                                  lambda_j = lj,
                                  learning_rate = lr,
                                  topK=tk,
                                  sgd_mode=sgd_mode
                                  )

        if log_path != None:
            sys.stdout = orig_stdout
            f.close()
Esempio n. 4
0
    def fit(self, URM_train=d.get_urm_train(), epochs=30, URM_test=d.get_urm_test(), user_ids=d.get_target_playlists(),
            batch_size = 1000, validate_every_N_epochs = 1, start_validation_after_N_epochs = 0, lambda_i = 0.0,
            lambda_j = 0.0, learning_rate = 0.01, topK = 200, sgd_mode='adagrad'):

        """
        train the model finding matrix W
        :param epochs(int)
        :param batch_size(int) after how many items the params should be updated
        :param lambda_i(float) first regularization term
        :param lambda_j(float) second regularization term
        :param learning_rate(float) algorithm learning rate
        :param topK(int) how many elements should be taken into account while computing URM*W
        :param sgd_mode(string) optimization algorithm
        :param URM_train(csr_matrix) the URM used to train the model. Either the full or the validation one
        :param URM_test(csr_matrix) needed if we'd like to perform validation
        :param user_ids(list) needed if we'd like to perform validation
        :param validate_every_N_epochs(int) how often the MAP evaluation should be displayed
        :param start_validation_after_N_epochs(int)
        """

        self.URM_train = URM_train
        self.n_users = URM_train.shape[0]
        self.n_items = URM_train.shape[1]

        self.sgd_mode = sgd_mode

        from Cython.SLIM_BPR.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch

        self.cythonEpoch = SLIM_BPR_Cython_Epoch(self.URM_train,
                                                 sparse_weights = False,
                                                 topK=topK,
                                                 learning_rate=learning_rate,
                                                 li_reg = lambda_i,
                                                 lj_reg = lambda_j,
                                                 batch_size=1,
                                                 symmetric = True,
                                                 sgd_mode = sgd_mode)

        # Cal super.fit to start training
        self._fit_alreadyInitialized(epochs=epochs,
                                    logFile=None,
                                    URM_test=URM_test,
                                    user_ids=user_ids,
                                    filterTopPop=False,
                                    minRatingsPerUser=1,
                                    batch_size=batch_size,
                                    validate_every_N_epochs=validate_every_N_epochs,
                                    start_validation_after_N_epochs=start_validation_after_N_epochs,
                                    lambda_i = lambda_i,
                                    lambda_j = lambda_j,
                                    learning_rate = learning_rate,
                                    topK = topK)
Esempio n. 5
0
        ranking = relevant_items_partition[relevant_items_partition_sorting]

        if with_scores:
            best_scores = scores[ranking]
            return [userid] + [list(zip(list(ranking), list(best_scores)))]
        else:
            return [userid] + list(ranking)

    def _filter_seen_on_scores(self, user_id, scores):

        seen = self.URM_train.indices[self.URM_train.indptr[user_id]:self.URM_train.indptr[user_id + 1]]

        scores[seen] = -np.inf
        return scores

    def get_r_hat(self, load_from_file=False, path=''):
        if load_from_file:
            return load_npz(path)
        else:
            return self.URM_train[d.get_target_playlists()].dot(self.W_sparse)

# test

s = SLIM_BPR(d.get_urm_train())
s.fit(epochs=100, validate_every_N_epochs=101, learning_rate=1e-2,
      lambda_i = 1e-4, lambda_j = 1e-4)
# s.evaluate(recs, d.get_urm_test(), print_result=True)
# importexport.exportcsv(recs, 'submission', 'SLIM_BPR')
s.save_r_hat(evaluation=True)

Esempio n. 6
0
    def validate_als(self,
                     factors_array,
                     regularization_array,
                     iterations_array,
                     alpha_val_array,
                     userids,
                     urm_train=data.get_urm_train(),
                     urm_test=data.get_urm_test(),
                     filter_already_liked=True,
                     items_to_exclude=[],
                     N=10,
                     verbose=True,
                     write_on_file=True):
        """

        :param factors_array
        :param regularization_array
        :param iterations_array
        :param alpha_val_array
        :param userids: id of the users to take into account during evaluation
        :param urm_train: matrix on which train the model
        :param urm_test: matrix in which test the model
        :param filter_already_liked:
        :param items_to_exclude:
        :param N: evaluate on map@10
        :param verbose:
        :param write_on_file:
        -----------
        :return: _
        """

        #create the initial model
        recommender = AlternatingLeastSquare(urm_train)

        path = 'validation_results/'
        name = 'als'
        folder = time.strftime('%d-%m-%Y')
        filename = '{}/{}/{}{}.csv'.format(path, folder, name,
                                           time.strftime('_%H-%M-%S'))
        # create dir if not exists
        os.makedirs(os.path.dirname(filename), exist_ok=True)

        with open(filename, 'w') as out:
            for f in factors_array:
                for r in regularization_array:
                    for i in iterations_array:
                        for a in alpha_val_array:

                            #train the model with the parameters
                            if verbose:
                                print(
                                    '\n\nTraining ALS with\n Factors: {}\n Regulatization: {}\n'
                                    'Iterations: {}\n Alpha_val: {}'.format(
                                        f, r, i, a))
                                print('\n training phase...')
                            recommender.fit(f, r, i, a)

                            #get the recommendations from the trained model
                            recommendations = recommender.recommend_batch(
                                userids=userids,
                                N=N,
                                filter_already_liked=filter_already_liked,
                                items_to_exclude=items_to_exclude)
                            #evaluate the model with map10
                            map10 = recommender.evaluate(recommendations,
                                                         test_urm=urm_test)
                            if verbose:
                                print('map@10: {}'.format(map10))

                            #write on external files on folder models_validation
                            if write_on_file:
                                out.write(
                                    '\n\nFactors: {}\n Regulatization: {}\n Iterations: {}\n '
                                    'Alpha_val: {}\n evaluation map@10: {}'.
                                    format(f, r, i, a, map10))
Esempio n. 7
0
from recommenders.collaborative_filtering.SLIM_RMSE import SLIMElasticNetRecommender
import data.data as d
import inout.importexport as io

urm = d.get_urm()
urm_train = d.get_urm_train()
target_id = d.get_all_playlists()
urm_test = d.get_urm_test()
t_id = d.get_target_playlists()


recommender = SLIMElasticNetRecommender(urm)
recommender.fit(topK=100, alpha=1e-4, l1_ratio=0.1, max_iter=100, tol=1e-4)
recommender.save_r_hat()
#recommendations = recommender.recommend_batch(userids=t_id)
#map10 = recommender.evaluate(recommendations, test_urm=urm_test)
#print('map@10: {}'.format(map10))
#io.exportcsv(recommendations, path='submissions', name='slim_rmse')


Esempio n. 8
0
    def run(self,
            distance,
            urm_train=None,
            urm=None,
            urm_test=None,
            targetids=None,
            k=100,
            shrink=10,
            threshold=0,
            implicit=True,
            alpha=None,
            beta=None,
            l=None,
            c=None,
            with_scores=False,
            export=True,
            verbose=True):
        """
        Run the model and export the results to a file

        Parameters
        ----------
        distance : str, distance metric
        urm : csr matrix, URM. If None, used: data.get_urm_train(). This should be the
            entire URM for which the targetids corresponds to the row indexes.
        urm_test : csr matrix, urm where to test the model. If None, use: data.get_urm_test()
        targetids : list, target user ids. If None, use: data.get_target_playlists()
        k : int, K nearest neighbour to consider
        shrink : float, shrink term used in the normalization
        threshold : float, all the values under this value are cutted from the final result
        implicit : bool, if true, treat the URM as implicit, otherwise consider explicit ratings (real values) in the URM

        Returns
        -------
        recs: (list) recommendations
        map10: (float) MAP10 for the provided recommendations
        """
        _urm = data.get_urm_train()
        _icm = data.get_icm()
        _urm_test = data.get_urm_test()
        _targetids = data.get_target_playlists()
        #_targetids = data.get_all_playlists()

        start = time.time()

        urm_train = _urm if urm_train is None else urm_train
        urm = _urm if urm is None else urm
        urm_test = _urm_test if urm_test is None else urm_test
        targetids = _targetids if targetids is None else targetids

        self.fit(urm_train,
                 k=k,
                 distance=distance,
                 alpha=alpha,
                 beta=beta,
                 c=c,
                 l=l,
                 shrink=shrink,
                 threshold=threshold,
                 implicit=implicit)
        recs = self.recommend_batch(targetids,
                                    urm=urm,
                                    with_scores=with_scores,
                                    verbose=verbose)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs,
                      path='submission',
                      name='{}_{}'.format(self.name, distance),
                      verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time() - start))

        return recs, map10
Esempio n. 9
0
    def run(self,
            urm_train=None,
            urm=None,
            urm_test=None,
            targetids=None,
            factors=100,
            regularization=0.01,
            iterations=100,
            alpha=25,
            with_scores=False,
            export=True,
            verbose=True):
        """
        Run the model and export the results to a file

        Returns
        -------
        :return: recs: (list) recommendations
        :return: map10: (float) MAP10 for the provided recommendations
        """
        _urm_train = data.get_urm_train()
        _urm = data.get_urm()
        _icm = data.get_icm()
        _urm_test = data.get_urm_test()
        _targetids = data.get_target_playlists()
        # _targetids = data.get_all_playlists()

        start = time.time()

        urm_train = _urm_train if urm_train is None else urm_train
        urm = _urm if urm is None else urm
        urm_test = _urm_test if urm_test is None else urm_test
        targetids = _targetids if targetids is None else targetids

        self.fit(l1_ratio=0.1,
                 positive_only=True,
                 alpha=1e-4,
                 fit_intercept=False,
                 copy_X=False,
                 precompute=False,
                 selection='random',
                 max_iter=100,
                 topK=100,
                 tol=1e-4,
                 workers=multiprocessing.cpu_count())
        recs = self.recommend_batch(userids=targetids,
                                    with_scores=with_scores,
                                    verbose=verbose)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs, path='submission', name=self.name, verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time() - start))

        return recs, map10