Esempio n. 1
0
    def run(self,
            epochs=70,
            batch_size=1000,
            lambda_i=0.0,
            lambda_j=0.0,
            learning_rate=0.01,
            topK=1500,
            sgd_mode='adagrad',
            export_results=True,
            export_r_hat=False):
        """
        meant as a shortcut to run the model after the validation procedure,
        allowing the export of the scores on the playlists or of the estimated csr matrix

        :param epochs(int)
        :param batch_size(int) after how many items the params should be updated
        :param lambda_i(float) first regularization term
        :param lambda_j(float) second regularization term
        :param learning_rate(float) algorithm learning rate
        :param topK(int) how many elements should be taken into account while computing URM*W
        :param sgd_mode(string) optimization algorithm
        :param export_results(bool) export a ready-to-kaggle csv with the predicted songs for each playlist
        :param export_r_hat(bool) whether to export or not the estimated csr matrix
        """

        self.fit(URM_train=d.get_urm(),
                 epochs=epochs,
                 URM_test=None,
                 user_ids=None,
                 batch_size=batch_size,
                 validate_every_N_epochs=1,
                 start_validation_after_N_epochs=epochs + 1,
                 lambda_i=lambda_i,
                 lambda_j=lambda_j,
                 learning_rate=learning_rate,
                 topK=topK,
                 sgd_mode=sgd_mode)
        if export_results:
            print('exporting results')
            recs = self.recommend_batch(d.get_target_playlists(),
                                        N=10,
                                        urm=d.get_urm(),
                                        filter_already_liked=True,
                                        with_scores=False,
                                        items_to_exclude=[],
                                        verbose=False)
            importexport.exportcsv(
                recs, 'submission',
                self._print(epochs=epochs,
                            batch_size=batch_size,
                            lambda_i=lambda_i,
                            lambda_j=lambda_j,
                            learning_rate=learning_rate,
                            topK=topK,
                            sgd_mode=sgd_mode))
        elif export_r_hat:
            print('saving estimated urm')
            self.save_r_hat()
Esempio n. 2
0
    def run(self,
            num_factors,
            urm_train=None,
            urm=None,
            urm_test=None,
            targetids=None,
            with_scores=False,
            export=True,
            verbose=True):
        """
        Run the model and export the results to a file

        Parameters
        ----------
        num_factors : int, number of latent factors
        urm : csr matrix, URM. If None, used: data.get_urm_train(). This should be the
            entire URM for which the targetids corresponds to the row indexes.
        urm_test : csr matrix, urm where to test the model. If None, use: data.get_urm_test()
        targetids : list, target user ids. If None, use: data.get_target_playlists()

        Returns
        -------
        recs: (list) recommendations
        map10: (float) MAP10 for the provided recommendations
        """
        _urm = data.get_urm_train()
        _icm = data.get_icm()
        _urm_test = data.get_urm_test()
        _targetids = data.get_target_playlists()
        #_targetids = data.get_all_playlists()

        start = time.time()

        urm_train = _urm if urm_train is None else urm_train
        #urm = _urm if urm is None else urm
        urm_test = _urm_test if urm_test is None else urm_test
        targetids = _targetids if targetids is None else targetids

        self.fit(urm_train=urm_train, num_factors=num_factors)
        recs = self.recommend_batch(userids=targetids)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs, path='submission', name=self.name, verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time() - start))

        return recs, map10
Esempio n. 3
0
    def run(self, distance, urm=None, icm=None, urm_test=None, targetids=None, k=100, shrink=10, threshold=0,
        alpha=None, beta=None, l=None, c=None, with_scores=False, export=True, verbose=True):
        """
        Run the model and export the results to a file

        Parameters
        ----------
        distance : str, distance metric
        urm : csr matrix, URM. If None, used: data.get_urm_train(). This should be the
            entire URM for which the targetids corresponds to the row indexes.
        icm : csr matrix, ICM. If None, used: data.get_icm()
        urm_test : csr matrix, urm where to test the model. If None, use: data.get_urm_test()
        targetids : list, target user ids. If None, use: data.get_target_playlists()
        k : int, K nearest neighbour to consider
        shrink : float, shrink term used in the normalization
        threshold : float, all the values under this value are cutted from the final result

        Returns
        -------
        recs: (list) recommendations
        map10: (float) MAP10 for the provided recommendations
        """
        # _urm = data.get_urm_train()
        # _icm = data.get_icm()
        # _urm_test = data.get_urm_test()
        # _targetids = data.get_target_playlists()
        #_targetids = data.get_all_playlists()

        start = time.time()

        # urm = _urm if urm is None else urm
        # icm = _icm if icm is None else icm
        # urm_test = _urm_test if urm_test is None else urm_test
        # targetids = _targetids if targetids is None else targetids

        self._print(distance, k, shrink, threshold, alpha, beta, l, c)

        self.fit(urm, icm=icm, k=k, distance=distance, shrink=shrink, threshold=threshold, alpha=alpha, beta=beta, l=l, c=c)
        recs = self.recommend_batch(userids=targetids, urm=urm, N=10, verbose=verbose)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs, path='submission', name='{}_{}'.format(self.name,distance), verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time()-start))
        
        return recs, map10
Esempio n. 4
0
    def run(self, distance, ucm_train=None, urm=None, urm_test=None, targetids=None, k=100, shrink=10, threshold=0,
            implicit=True, alpha=None, beta=None, l=None, c=None, with_scores=False, export=True, verbose=True):
        """
        Run the model and export the results to a file

        Parameters
        ----------
        distance : str, distance metric
        targetids : list, target user ids. If None, use: data.get_target_playlists()
        k : int, K nearest neighbour to consider
        shrink : float, shrink term used in the normalization
        threshold : float, all the values under this value are cutted from the final result
        implicit : bool, if true, treat the URM as implicit, otherwise consider explicit ratings (real values) in the URM

        Returns
        -------
        recs: (list) recommendations
        map10: (float) MAP10 for the provided recommendations
        """
        start = time.time()
        
        _ucm_train = data.get_ucm_train()
        _urm = data.get_urm_train_1()
        _urm_test = data.get_urm_test_1()
        _targetids = data.get_target_playlists()

        ucm_train = _ucm_train if ucm_train is None else ucm_train
        urm = _urm if urm is None else urm
        urm_test = _urm_test if urm_test is None else urm_test
        targetids = _targetids if targetids is None else targetids

        self.fit(ucm_train, k=k, distance=distance, alpha=alpha, beta=beta, c=c, l=l, shrink=shrink, threshold=threshold, implicit=implicit)
        recs = self.recommend_batch(targetids, urm=urm, with_scores=with_scores, verbose=verbose)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs, path='submission', name='{}_{}'.format(self.name,distance), verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time()-start))
        
        return recs, map10
Esempio n. 5
0
    def run(self,
            normalize_similarity=False,
            add_zeros_quota=1,
            loss_tolerance=1e-6,
            iteration_limit=30,
            damp_coeff=1,
            use_incremental=False,
            export_results=True,
            export_r_hat=False,
            export_for_validation=False):
        if export_r_hat and export_for_validation:
            urm = d.get_urm_train_1()
        else:
            urm = d.get_urm()

        self.fit(ICM=d.get_icm(),
                 URM_train=urm,
                 normalize_similarity=normalize_similarity,
                 add_zeros_quota=add_zeros_quota,
                 loss_tolerance=loss_tolerance,
                 iteration_limit=iteration_limit,
                 damp_coeff=damp_coeff,
                 use_incremental=use_incremental)
        if export_results:
            print('exporting results')
            recs = self.recommend_batch(d.get_target_playlists(),
                                        N=10,
                                        urm=urm,
                                        filter_already_liked=True,
                                        with_scores=False,
                                        items_to_exclude=[],
                                        verbose=False)
            importexport.exportcsv(
                recs, 'submission',
                self._print(normalize_similarity=normalize_similarity,
                            add_zeros_quota=add_zeros_quota,
                            loss_tolerance=loss_tolerance,
                            iteration_limit=iteration_limit,
                            damp_coeff=damp_coeff,
                            use_incremental=use_incremental))
        elif export_r_hat:
            print('saving estimated urm')
            self.save_r_hat(export_for_validation)
Esempio n. 6
0
    def run(self, distance, h, k=100, shrink=10, threshold=0,
            alpha=None, beta=None, l=None, c=None, export=True, verbose=True):
        """
        Run the model and export the results to a file

        Parameters
        ----------
        distance : str, distance metric
        k : int, K nearest neighbour to consider
        shrink : float, shrink term used in the normalization
        threshold : float, all the values under this value are cutted from the final result
        implicit : bool, if true, treat the URM as implicit, otherwise consider explicit ratings (real values) in the URM

        Returns
        -------
        recs: (list) recommendations
        map10: (float) MAP10 for the provided recommendations
        """
        start = time.time()

        self.fit(k=k, distance=distance, shrink=shrink, alpha=alpha, beta=beta, l=l, c=c, verbose=verbose)
        recs = self.recommend_batch(N=10, filter_already_liked=True, verbose=verbose)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=data.get_urm_test_1(), verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs, path='submission', name='{}_{}'.format(self.name,distance), verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time()-start))
        
        return recs, map10
Esempio n. 7
0
    def run(self,
            urm_train=None,
            urm=None,
            urm_test=None,
            targetids=None,
            factors=100,
            regularization=0.01,
            iterations=100,
            alpha=25,
            with_scores=False,
            export=True,
            verbose=True):
        """
        Run the model and export the results to a file

        Returns
        -------
        :return: recs: (list) recommendations
        :return: map10: (float) MAP10 for the provided recommendations
        """
        _urm_train = data.get_urm_train_1()
        _urm = data.get_urm()
        _icm = data.get_icm()
        _urm_test = data.get_urm_test_1()
        _targetids = data.get_target_playlists()
        # _targetids = data.get_all_playlists()

        start = time.time()

        urm_train = _urm_train if urm_train is None else urm_train
        urm = _urm if urm is None else urm
        urm_test = _urm_test if urm_test is None else urm_test
        targetids = _targetids if targetids is None else targetids

        self.fit(l1_ratio=0.1,
                 positive_only=True,
                 alpha=1e-4,
                 fit_intercept=False,
                 copy_X=False,
                 precompute=False,
                 selection='random',
                 max_iter=100,
                 topK=100,
                 tol=1e-4,
                 workers=multiprocessing.cpu_count())
        recs = self.recommend_batch(userids=targetids,
                                    with_scores=with_scores,
                                    verbose=verbose)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs, path='submission', name=self.name, verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time() - start))

        return recs, map10
Esempio n. 8
0
        sps.save_npz(path + model.RECOMMENDER_NAME, r_hat)
    elif arg == 's':
        model.fit(topK=500, alpha=1.7, min_rating=1, normalize_similarity=True)
        print('Saving the similarity matrix...')
        sps.save_npz(
            'raw_data/saved_sim_matrix_evaluation_1/{}'.format(
                model.RECOMMENDER_NAME), model.W_sparse)
    elif arg == 'v':
        model = P3alphaRecommender(data.get_urm_train_1())
        model.validate(iterations=15,
                       urm_test=data.get_urm_test_1(),
                       targetids=data.get_target_playlists(),
                       normalize_similarity=True,
                       k=(50, 900),
                       min_rating=(0, 2),
                       alpha=(0, 2),
                       verbose=False)
    elif arg == 'e':
        model = P3alphaRecommender(data.get_urm())
        model.fit(topK=900,
                  alpha=1.2,
                  min_rating=0,
                  implicit=True,
                  normalize_similarity=False)
        recs = model.recommend_batch(data.get_target_playlists())
        export.exportcsv(recs, name=model.RECOMMENDER_NAME)
    elif arg == 'x':
        pass
    else:
        log.error('Wrong option!')
Esempio n. 9
0
def wizard_hybrid():
    SIM_MATRIX = ['saved_sim_matrix', 'saved_sim_matrix_evaluation']
    R_HAT = ['saved_r_hat', 'saved_r_hat_evaluation']
    SAVE = ['saved_sim_matrix', 'saved_r_hat']
    EVALUATE = ['saved_sim_matrix_evaluation', 'saved_r_hat_evaluation']

    start = time.time()

    matrices_array, folder, models = hb.create_matrices_array()

    print('matrices loaded in {:.2f} s'.format(time.time() - start))
    log.success('You have loaded: {}'.format(models))

    NORMALIZATION_MODE = normalization_mode_selection()

    if folder in SAVE:
        WEIGHTS = weights_selection(models)

        if folder in SIM_MATRIX:
            name, urm_filter_tracks, rel_path = option_selection_save('SIM')
            hybrid_rec = HybridSimilarity(
                matrices_array,
                normalization_mode=NORMALIZATION_MODE,
                urm_filter_tracks=urm_filter_tracks)
            sps.save_npz('raw_data/' + rel_path + name,
                         hybrid_rec.get_r_hat(weights_array=WEIGHTS))
        if folder in R_HAT:
            name, urm_filter_tracks, rel_path, EXPORT = option_selection_save(
                'R_HAT')
            hybrid_rec = HybridRHat(matrices_array,
                                    normalization_mode=NORMALIZATION_MODE,
                                    urm_filter_tracks=urm_filter_tracks)
            if EXPORT:
                N = ask_number_recommendations()
                recommendations = hybrid_rec.recommend_batch(
                    weights_array=WEIGHTS,
                    target_userids=data.get_target_playlists(),
                    N=N)
                exportcsv(recommendations, path='submission', name=name)
            else:
                sps.save_npz('raw_data/' + rel_path + name,
                             hybrid_rec.get_r_hat(weights_array=WEIGHTS))

    elif folder in EVALUATE:
        log.success('|WHAT YOU WANT TO DO ???|')
        log.warning('\'1\' BAYESIAN SEARCH VALIDATION')
        log.warning('\'2\' HAND CRAFTED WEIGHTS')
        mode = input()[0]

        # BAYESIAN SEARCH
        if mode == '1':
            log.success(
                '|SELECT A NUMBER OF |||ITERATIONS||| FOR THE ALGORITHM|')
            iterations = float(input())
            urm_filter_tracks = data.get_urm_train_1()
            if folder in SIM_MATRIX:
                hybrid_rec = HybridSimilarity(
                    matrices_array,
                    normalization_mode=NORMALIZATION_MODE,
                    urm_filter_tracks=urm_filter_tracks)
            if folder in R_HAT:
                hybrid_rec = HybridRHat(matrices_array,
                                        normalization_mode=NORMALIZATION_MODE,
                                        urm_filter_tracks=urm_filter_tracks)
            hybrid_rec.validate(iterations=iterations,
                                urm_test=data.get_urm_test_1(),
                                userids=data.get_target_playlists())

        # MANUAL WEIGHTS
        elif mode == '2':
            WEIGHTS = weights_selection(models)
            urm_filter_tracks = data.get_urm_train_1()
            chose = option_selection_evaluation_2()  # save, evaluate or csv
            if chose == 's':
                log.success('|CHOSE A NAME FOR THE MATRIX...|')
                name = input()
                if folder in SIM_MATRIX:
                    type = 'SIM'
                    hybrid_rec = HybridSimilarity(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                elif folder in R_HAT:
                    type = 'R_HAT'
                    hybrid_rec = HybridRHat(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)

                sps.save_npz('raw_data/saved_r_hat_evaluation/' + name,
                             hybrid_rec.get_r_hat(weights_array=WEIGHTS))
                sym_rec = symmetric_recommender_creator(
                    models,
                    type,
                    NORMALIZATION_MODE,
                    urm_filter_tracks=data.get_urm_train_2())
                sps.save_npz('raw_data/saved_r_hat_evaluation_2/' + name,
                             sym_rec.get_r_hat(weights_array=WEIGHTS))

            elif chose == 'e':
                if folder in SIM_MATRIX:
                    type = 'SIM'
                    hybrid_rec = HybridSimilarity(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                elif folder in R_HAT:
                    type = 'R_HAT'
                    hybrid_rec = HybridRHat(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                N = ask_number_recommendations()
                print('Recommending...')
                recs = hybrid_rec.recommend_batch(
                    weights_array=WEIGHTS,
                    target_userids=data.get_target_playlists(),
                    N=N)
                hybrid_rec.evaluate(recommendations=recs,
                                    test_urm=data.get_urm_test_1())

                # export the recommendations
                log.success(
                    'Do you want to save the CSV with these recomendations? (y/n)'
                )
                if input()[0] == 'y':
                    export_csv_wizard(recs)

                sym_rec = symmetric_recommender_creator(
                    models,
                    type,
                    NORMALIZATION_MODE,
                    urm_filter_tracks=data.get_urm_train_2())
                recs2 = sym_rec.recommend_batch(
                    weights_array=WEIGHTS,
                    target_userids=data.get_target_playlists())
                sym_rec.evaluate(recommendations=recs2,
                                 test_urm=data.get_urm_test_2())

            elif chose == 'c':
                if folder in R_HAT:
                    hybrid_rec = HybridRHat(
                        matrices_array,
                        normalization_mode=NORMALIZATION_MODE,
                        urm_filter_tracks=urm_filter_tracks)
                    N = ask_number_recommendations()
                    print('Recommending...')
                    recs = hybrid_rec.recommend_batch(
                        weights_array=WEIGHTS,
                        target_userids=data.get_target_playlists(),
                        N=N)

                    export_csv_wizard(recs)
                else:
                    log.error('not implemented yet')
    else:
        log.error('WRONG FOLDER')
Esempio n. 10
0
def export_csv_wizard(recommendations):
    log.info('Choose a name for the CSV:')
    name = input()
    exportcsv(recommendations, name=name)
    log.success('CSV saved!')