def __init__(self, ml_type='classifier', metric=None, ml_p=None): """Initialize recommendation system.""" super().__init__(ml_type, metric, ml_p) # store results self.results_df = pd.DataFrame() # reader for translating btw PennAI results and Suprise training set self.reader = Reader() # algo is the online Surprise-based rec system self.algo = mySVD(n_factors=20, n_epochs=20, biased=True, init_mean=0, init_std_dev=.2, lr_all=.01, reg_all=.02, lr_bu=None, lr_bi=None, lr_pu=None, lr_qi=None, reg_bu=None, reg_bi=None, reg_pu=None, reg_qi=None, random_state=None, verbose=False) self.first_fit = True self.max_epochs = 100
def set_algo(self, surprise_kwargs={}): alg_kwargs = { 'n_factors': 20, 'biased': True, 'init_mean': 0, 'init_std_dev': .2, 'lr_all': .01, 'reg_all': .02, 'verbose': False } alg_kwargs.update(surprise_kwargs) self.algo = mySVD(**alg_kwargs)
class SVDRecommender(SurpriseRecommender): """SVD recommender. see https://surprise.readthedocs.io/en/stable/matrix_factorization.html Recommends machine learning algorithms and parameters using the SVD algorithm. - stores ML + P and every dataset. - learns a matrix factorization on the non-missing data. - given a dataset, estimates the rankings of all ML+P and returns the top n_recs. Note that we use a custom online version of SVD found here: https://github.com/lacava/surprise """ algo = mySVD(n_factors=20, biased=True, init_mean=0, init_std_dev=.2, lr_all=.01, reg_all=.02, lr_bu=None, lr_bi=None, lr_pu=None, lr_qi=None, reg_bu=None, reg_bi=None, reg_pu=None, reg_qi=None, random_state=None, verbose=False) def update_model(self, results_data): """Stores new results and updates SVD.""" logger.info('updating SVD model') # shuffle the results data the first time if self.first_fit: results_data = results_data.sample(frac=1) self.update_training_data(results_data) logger.debug('fitting self.algo...') # set the number of training iterations proportionally to the amount of # results_data # self.algo.n_epochs = min(len(results_data),self.max_epochs) # self.algo.n_epochs = max(10,self.algo.n_epochs) self.algo.n_epochs = min(len(results_data), self.max_epochs) self.algo.n_epochs = max(self.algo.n_epochs, self.min_epochs) self.algo.partial_fit(self.trainset) logger.debug('done.') if self.first_fit: self.init_results_data = results_data self.first_fit = False logger.debug('model SVD updated')