Ejemplo n.º 1
0
    def __init__(self):
        self.name = "CFW_D_Similarity_Linalg"

        # best item based similarity collaborative filter
        item = CFItemBased()
        sim_item = item.fit(d.get_urm(),
                            600,
                            distance=DistanceBasedRecommender.SIM_SPLUS,
                            shrink=10,
                            alpha=0.25,
                            beta=0.5,
                            l=0.25,
                            c=0.5).tocsr()
        # normalization, matrix similarity values are now among 0 and 1. little push in performances
        self.S_matrix_target = sim_item / sim_item.max()

        # best similarity content based
        content = ContentBasedRecommender()
        sim_content = content.fit(d.get_urm(),
                                  d.get_icm(),
                                  k=500,
                                  distance=DistanceBasedRecommender.SIM_SPLUS,
                                  shrink=500,
                                  alpha=0.75,
                                  beta=1,
                                  l=0.5,
                                  c=0.5).tocsr()
        # normalization, matrix similarity values are now among 0 and 1. little push in performances
        self.S_matrix_contentKNN = sim_content / sim_content.max()
Ejemplo n.º 2
0
    def run(self,
            num_factors,
            urm_train=None,
            urm=None,
            urm_test=None,
            targetids=None,
            with_scores=False,
            export=True,
            verbose=True):
        """
        Run the model and export the results to a file

        Parameters
        ----------
        num_factors : int, number of latent factors
        urm : csr matrix, URM. If None, used: data.get_urm_train(). This should be the
            entire URM for which the targetids corresponds to the row indexes.
        urm_test : csr matrix, urm where to test the model. If None, use: data.get_urm_test()
        targetids : list, target user ids. If None, use: data.get_target_playlists()

        Returns
        -------
        recs: (list) recommendations
        map10: (float) MAP10 for the provided recommendations
        """
        _urm = data.get_urm_train()
        _icm = data.get_icm()
        _urm_test = data.get_urm_test()
        _targetids = data.get_target_playlists()
        #_targetids = data.get_all_playlists()

        start = time.time()

        urm_train = _urm if urm_train is None else urm_train
        #urm = _urm if urm is None else urm
        urm_test = _urm_test if urm_test is None else urm_test
        targetids = _targetids if targetids is None else targetids

        self.fit(urm_train=urm_train, num_factors=num_factors)
        recs = self.recommend_batch(userids=targetids)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs, path='submission', name=self.name, verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time() - start))

        return recs, map10
Ejemplo n.º 3
0
    def validate(self,
                 user_ids=d.get_target_playlists(),
                 log_path=None,
                 normalize_similarity=[False],
                 damp_coeff=[1],
                 add_zeros_quota=[1],
                 loss_tolerance=[1e-6],
                 iteration_limit=[30],
                 use_incremental=[False]):
        if log_path != None:
            orig_stdout = sys.stdout
            f = open(
                log_path + '/' + self.name + ' ' + time.strftime('_%H-%M-%S') +
                ' ' + time.strftime('%d-%m-%Y') + '.txt', 'w')
            sys.stdout = f

        for ns in normalize_similarity:
            for dc in damp_coeff:
                for adq in add_zeros_quota:
                    for lt in loss_tolerance:
                        for il in iteration_limit:
                            for ui in use_incremental:
                                print(
                                    self._print(normalize_similarity=ns,
                                                add_zeros_quota=dc,
                                                loss_tolerance=lt,
                                                iteration_limit=il,
                                                damp_coeff=dc,
                                                use_incremental=ui))
                                self.fit(ICM=d.get_icm(),
                                         URM_train=d.get_urm_train_1(),
                                         normalize_similarity=ns,
                                         add_zeros_quota=adq,
                                         loss_tolerance=lt,
                                         iteration_limit=il,
                                         damp_coeff=dc,
                                         use_incremental=ui)

                                recs = self.recommend_batch(
                                    user_ids, urm=d.get_urm_train_1())
                                r.evaluate(recs, d.get_urm_test_1())
        if log_path != None:
            sys.stdout = orig_stdout
            f.close()
Ejemplo n.º 4
0
    def run(self,
            normalize_similarity=False,
            add_zeros_quota=1,
            loss_tolerance=1e-6,
            iteration_limit=30,
            damp_coeff=1,
            use_incremental=False,
            export_results=True,
            export_r_hat=False,
            export_for_validation=False):
        if export_r_hat and export_for_validation:
            urm = d.get_urm_train_1()
        else:
            urm = d.get_urm()

        self.fit(ICM=d.get_icm(),
                 URM_train=urm,
                 normalize_similarity=normalize_similarity,
                 add_zeros_quota=add_zeros_quota,
                 loss_tolerance=loss_tolerance,
                 iteration_limit=iteration_limit,
                 damp_coeff=damp_coeff,
                 use_incremental=use_incremental)
        if export_results:
            print('exporting results')
            recs = self.recommend_batch(d.get_target_playlists(),
                                        N=10,
                                        urm=urm,
                                        filter_already_liked=True,
                                        with_scores=False,
                                        items_to_exclude=[],
                                        verbose=False)
            importexport.exportcsv(
                recs, 'submission',
                self._print(normalize_similarity=normalize_similarity,
                            add_zeros_quota=add_zeros_quota,
                            loss_tolerance=loss_tolerance,
                            iteration_limit=iteration_limit,
                            damp_coeff=damp_coeff,
                            use_incremental=use_incremental))
        elif export_r_hat:
            print('saving estimated urm')
            self.save_r_hat(export_for_validation)
Ejemplo n.º 5
0
    def test(self, distance=DistanceBasedRecommender.SIM_SPLUS, k=600, shrink=10, threshold=0, alpha=0.25, beta=0.5, l=0.5, c=0.25):
        """
        meant as a shortcut to run the model after the validation procedure,
        allowing the export of the scores on the playlists or of the estimated csr matrix
        """
        recs, map = self.run(urm=d.get_urm(),
                             icm=d.get_icm(),
                             targetids=d.get_target_playlists(),
                             distance=distance,
                             k=k, shrink=shrink,
                             threshold=threshold,
                             alpha=alpha,
                             beta=beta,
                             l=l,
                             c=c,
                             export=export_results)

        if export_r_hat:
            print('saving estimated urm')
            self.save_r_hat()
        return recs, map
Ejemplo n.º 6
0
    def fit(self,
            ICM=data.get_icm(),
            URM_train=data.get_urm_train_1(),
            normalize_similarity=True,
            add_zeros_quota=0.1,
            loss_tolerance=0.0001,
            iteration_limit=100,
            damp_coeff=0.1,
            use_incremental=True):
        self.URM_train = URM_train
        self.ICM = ICM
        self.n_items = self.URM_train.shape[1]
        self.n_users = self.URM_train.shape[0]
        self.n_features = self.ICM.shape[1]
        self.normalize_similarity = normalize_similarity
        self.add_zeros_quota = add_zeros_quota
        self.use_incremental = use_incremental

        self._generateTrainData_low_ram()

        common_features = self.ICM[self.row_list].multiply(
            self.ICM[self.col_list])

        linalg_result = linalg.lsqr(common_features,
                                    self.data_list,
                                    show=False,
                                    atol=loss_tolerance,
                                    btol=loss_tolerance,
                                    iter_lim=iteration_limit,
                                    damp=damp_coeff)

        self.D_incremental = linalg_result[0].copy()
        self.D_best = linalg_result[0].copy()
        self.epochs_best = 0
        self.loss = linalg_result[3]

        self._compute_W_sparse()
Ejemplo n.º 7
0
    def run(self,
            urm_train=None,
            urm=None,
            urm_test=None,
            targetids=None,
            factors=100,
            regularization=0.01,
            iterations=100,
            alpha=25,
            with_scores=False,
            export=True,
            verbose=True):
        """
        Run the model and export the results to a file

        Returns
        -------
        :return: recs: (list) recommendations
        :return: map10: (float) MAP10 for the provided recommendations
        """
        _urm_train = data.get_urm_train_1()
        _urm = data.get_urm()
        _icm = data.get_icm()
        _urm_test = data.get_urm_test_1()
        _targetids = data.get_target_playlists()
        # _targetids = data.get_all_playlists()

        start = time.time()

        urm_train = _urm_train if urm_train is None else urm_train
        urm = _urm if urm is None else urm
        urm_test = _urm_test if urm_test is None else urm_test
        targetids = _targetids if targetids is None else targetids

        self.fit(l1_ratio=0.1,
                 positive_only=True,
                 alpha=1e-4,
                 fit_intercept=False,
                 copy_X=False,
                 precompute=False,
                 selection='random',
                 max_iter=100,
                 topK=100,
                 tol=1e-4,
                 workers=multiprocessing.cpu_count())
        recs = self.recommend_batch(userids=targetids,
                                    with_scores=with_scores,
                                    verbose=verbose)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs, path='submission', name=self.name, verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time() - start))

        return recs, map10
Ejemplo n.º 8
0
    log.warning('(r) Save the R^')
    log.warning('(s) Save the similarity matrix')
    #log.warning('(v) Validate the model')
    log.warning('(x) Exit')
    arg = input()[0]
    print()
    
    model = ContentBasedRecommender()
    if arg == 't':
        # recs = model.recommend_batch(userids=data.get_target_playlists(), urm=data.get_urm_train())
        # model.evaluate(recommendations=recs, test_urm=data.get_urm_test())
        model.test(distance=model.SIM_SPLUS, k=500,alpha=0.75,beta=1,shrink=500,l=0.5,c=0.5)
    elif arg == 'r':
        log.info('Wanna save for evaluation (y/n)?')
        choice = input()[0] == 'y'
        model.fit(urm=data.get_urm_train_2(),icm=data.get_icm(), distance=model.SIM_SPLUS,k=500,shrink=500,alpha=0.75,beta=1,l=0.5,c=0.5)
        print('Saving the R^...')
        model.save_r_hat(evaluation=choice)
    elif arg == 's':
        model.fit(urm=data.get_urm_train_2(),icm=data.get_icm(), distance=model.SIM_SPLUS,k=500,shrink=500,alpha=0.75,beta=1,l=0.5,c=0.5)
        print('Saving the similarity matrix...')
        sps.save_npz('raw_data/saved_sim_matrix_evaluation_2/{}'.format(model.name), model.get_sim_matrix())
    # elif arg == 'v':
    #     model.validate(....)
    elif arg == 'x':
        pass
    else:
        log.error('Wrong option!')

    # recs = model.recommend_batch(userids=data.get_target_playlists(), urm=data.get_urm_train())
    # recs_seq = model.recommend_batch(userids=data.get_sequential_target_playlists(), urm=data.get_urm_train())
Ejemplo n.º 9
0
    def run(self,
            distance,
            urm_train=None,
            urm=None,
            urm_test=None,
            targetids=None,
            k=100,
            shrink=10,
            threshold=0,
            implicit=True,
            alpha=None,
            beta=None,
            l=None,
            c=None,
            with_scores=False,
            export=True,
            verbose=True):
        """
        Run the model and export the results to a file

        Parameters
        ----------
        distance : str, distance metric
        urm : csr matrix, URM. If None, used: data.get_urm_train(). This should be the
            entire URM for which the targetids corresponds to the row indexes.
        urm_test : csr matrix, urm where to test the model. If None, use: data.get_urm_test()
        targetids : list, target user ids. If None, use: data.get_target_playlists()
        k : int, K nearest neighbour to consider
        shrink : float, shrink term used in the normalization
        threshold : float, all the values under this value are cutted from the final result
        implicit : bool, if true, treat the URM as implicit, otherwise consider explicit ratings (real values) in the URM

        Returns
        -------
        recs: (list) recommendations
        map10: (float) MAP10 for the provided recommendations
        """
        _urm = data.get_urm_train()
        _icm = data.get_icm()
        _urm_test = data.get_urm_test()
        _targetids = data.get_target_playlists()
        #_targetids = data.get_all_playlists()

        start = time.time()

        urm_train = _urm if urm_train is None else urm_train
        urm = _urm if urm is None else urm
        urm_test = _urm_test if urm_test is None else urm_test
        targetids = _targetids if targetids is None else targetids

        self.fit(urm_train,
                 k=k,
                 distance=distance,
                 alpha=alpha,
                 beta=beta,
                 c=c,
                 l=l,
                 shrink=shrink,
                 threshold=threshold,
                 implicit=implicit)
        recs = self.recommend_batch(targetids,
                                    urm=urm,
                                    with_scores=with_scores,
                                    verbose=verbose)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs,
                      path='submission',
                      name='{}_{}'.format(self.name, distance),
                      verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time() - start))

        return recs, map10
Ejemplo n.º 10
0
            raise ValueError(
                "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'"
                .format(self.FEATURE_WEIGHTING_VALUES, feature_weighting))

        if feature_weighting == "BM25":
            self.ICM = self.ICM.astype(np.float32)
            self.ICM = okapi_BM_25(self.ICM)

        elif feature_weighting == "TF-IDF":
            self.ICM = self.ICM.astype(np.float32)
            self.ICM = TF_IDF(self.ICM)

        similarity = Compute_Similarity(self.ICM.T,
                                        shrink=shrink,
                                        topK=topK,
                                        normalize=normalize,
                                        similarity=similarity,
                                        **similarity_args)

        if self.sparse_weights:
            self.W_sparse = similarity.compute_similarity()
        else:
            self.W = similarity.compute_similarity()
            self.W = self.W.toarray()


rec = ItemKNNCBFRecommender(ICM=data.get_icm(),
                            URM_train=data.get_urm_train_1())
rec.fit(feature_weighting='TF-IDF')
recs = rec.recommend_batch(userids=data.get_target_playlists(), type='ITEM')
rec.evaluate(recs, test_urm=data.get_urm_test_1())