Exemple #1
0
    def __init__(self, ml_type='classifier', metric=None, ml_p=None):
        """Initialize recommendation system."""
        super().__init__(ml_type, metric, ml_p)

        # store results
        self.results_df = pd.DataFrame()
        # reader for translating btw PennAI results and Suprise training set
        self.reader = Reader()
        # algo is the online Surprise-based rec system
        self.algo = mySVD(n_factors=20,
                          n_epochs=20,
                          biased=True,
                          init_mean=0,
                          init_std_dev=.2,
                          lr_all=.01,
                          reg_all=.02,
                          lr_bu=None,
                          lr_bi=None,
                          lr_pu=None,
                          lr_qi=None,
                          reg_bu=None,
                          reg_bi=None,
                          reg_pu=None,
                          reg_qi=None,
                          random_state=None,
                          verbose=False)

        self.first_fit = True
        self.max_epochs = 100
 def set_algo(self, surprise_kwargs={}):
     alg_kwargs = {
         'n_factors': 20,
         'biased': True,
         'init_mean': 0,
         'init_std_dev': .2,
         'lr_all': .01,
         'reg_all': .02,
         'verbose': False
     }
     alg_kwargs.update(surprise_kwargs)
     self.algo = mySVD(**alg_kwargs)
class SVDRecommender(SurpriseRecommender):
    """SVD recommender.
    see https://surprise.readthedocs.io/en/stable/matrix_factorization.html 
    Recommends machine learning algorithms and parameters using the SVD algorithm.
        - stores ML + P and every dataset.
        - learns a matrix factorization on the non-missing data.
        - given a dataset, estimates the rankings of all ML+P and returns the top 
        n_recs. 

    Note that we use a custom online version of SVD found here:
    https://github.com/lacava/surprise
    """
    algo = mySVD(n_factors=20,
                 biased=True,
                 init_mean=0,
                 init_std_dev=.2,
                 lr_all=.01,
                 reg_all=.02,
                 lr_bu=None,
                 lr_bi=None,
                 lr_pu=None,
                 lr_qi=None,
                 reg_bu=None,
                 reg_bi=None,
                 reg_pu=None,
                 reg_qi=None,
                 random_state=None,
                 verbose=False)

    def update_model(self, results_data):
        """Stores new results and updates SVD."""
        logger.info('updating SVD model')
        # shuffle the results data the first time
        if self.first_fit:
            results_data = results_data.sample(frac=1)

        self.update_training_data(results_data)
        logger.debug('fitting self.algo...')
        # set the number of training iterations proportionally to the amount of
        # results_data
        # self.algo.n_epochs = min(len(results_data),self.max_epochs)
        # self.algo.n_epochs = max(10,self.algo.n_epochs)
        self.algo.n_epochs = min(len(results_data), self.max_epochs)
        self.algo.n_epochs = max(self.algo.n_epochs, self.min_epochs)
        self.algo.partial_fit(self.trainset)
        logger.debug('done.')
        if self.first_fit:
            self.init_results_data = results_data
            self.first_fit = False
        logger.debug('model SVD updated')