def run_lda(self): """ Run LDA recommender. """ lda_recommender = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self.verbose, self.load_matrices, self.dump) results = lda_recommender.train() report_str = 'Summary: Test sum {:.2f}, Train sum {:.2f}, Final error {:.5f}, train recall {:.5f}, '\ 'test recall {:.5f}, recall@200 {:.5f}, '\ 'ratio {:.5f}, mrr@5 {:.5f}, '\ 'ndcg@5 {:.5f}, mrr@10 {:.5f}, ndcg@10 {:.5f}' print(report_str.format(*results))
def runTest(self): content_based = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options) self.assertEqual(content_based.n_factors, self.n_factors) self.assertEqual(content_based.n_items, self.documents) content_based.train() self.assertEqual(content_based.get_document_topic_distribution().shape, (self.documents, self.n_factors)) self.assertLessEqual( content_based.get_document_topic_distribution().max(), 1.0 + 1e-6) self.assertGreaterEqual( content_based.get_document_topic_distribution().min(), -1e-6) self.assertTrue(isinstance(content_based, AbstractRecommender)) self.assertEqual(content_based.get_predictions().shape, (self.users, self.documents)) self.assertLessEqual(content_based.get_predictions().max(), 1.0 + 1e-6) self.assertGreaterEqual(content_based.get_predictions().min(), -1e-6)
def run_lda(self): """ Run LDA recommender. """ lda_recommender = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self.verbose, self.load_matrices, self.dump) lda_recommender.train() lda_recommender.get_evaluation_report()
def __init__(self, initializer=None, abstracts_preprocessor=None, ratings=None, config=None, process_parser=False, verbose=False, load_matrices=True, dump_matrices=True, train_more=True, random_seed=False, results_file_name='top_recommendations'): """ Constructor of the RecommenderSystem. :param ModelInitializer initializer: A model initializer. :param AbstractsPreprocessor abstracts_preprocessor: A preprocessor of abstracts, if None then queried. :param int[][] ratings: Ratings matrix; if None, matrix gets queried from the database. :param boolean process_parser: A Flag deceiding process the dataparser. :param boolean verbose: A flag deceiding to print progress. :param boolean dump_matrices: A flag for saving output matrices. :param boolean train_more: train_more the collaborative filtering after loading matrices. :param boolean random_seed: A flag to determine if we will use random seed or not. :param str results_file_name: Top recommendations results' file name """ if process_parser: DataParser.process() if ratings is None: self.ratings = numpy.array(DataParser.get_ratings_matrix()) else: self.ratings = ratings if abstracts_preprocessor is None: self.abstracts_preprocessor = AbstractsPreprocessor( DataParser.get_abstracts(), *DataParser.get_word_distribution()) else: self.abstracts_preprocessor = abstracts_preprocessor # Get configurations self.config = RecommenderConfiguration(config) # Set flags self.results_file_name = results_file_name + '.dat' self._verbose = verbose self._dump_matrices = dump_matrices self._load_matrices = load_matrices self._train_more = train_more self._split_type = 'user' self._random_seed = random_seed self.set_hyperparameters(self.config.get_hyperparameters()) self.set_options(self.config.get_options()) self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iter, self._verbose) if self.config.get_error_metric() == 'RMS': self.evaluator = Evaluator(self.ratings, self.abstracts_preprocessor, self._random_seed, self._verbose) else: raise NameError( "Not a valid error metric %s. Only option is 'RMS'" % self.config.get_error_metric()) # Initialize content based. if self.config.get_content_based() == 'None': self.content_based = ContentBased(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA': self.content_based = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA2Vec': self.content_based = LDA2VecRecommender( self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) else: raise NameError( "Not a valid content based %s. Options are 'None', " "'LDA', 'LDA2Vec'" % self.config.get_content_based()) # Initialize collaborative filtering. if self.config.get_collaborative_filtering() == 'ALS': is_hybrid = self.config.get_recommender() == 'hybrid' if self.config.get_content_based() == 'None': raise NameError( "Not valid content based 'None' with hybrid recommender") self.collaborative_filtering = CollaborativeFiltering( self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices, self._train_more, is_hybrid) elif self.config.get_collaborative_filtering() == 'SDAE': self.collaborative_filtering = SDAERecommender( self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) if not self.config.get_content_based() == 'None': raise NameError( "Not a valid content based %s with SDAE. You can only use 'None'" % self.config.get_content_based()) elif self.config.get_collaborative_filtering() == 'None': if not self.config.get_recommender() == 'itembased': raise NameError( "None collaborative filtering is only valid with itembased recommender type" ) elif self.config.get_content_based() == 'None': raise NameError( "Not valid content based 'None' with item-based recommender" ) self.collaborative_filtering = None else: raise NameError("Not a valid collaborative filtering %s. " "Only options are 'None', 'ALS', 'SDAE'" % self.config.get_collaborative_filtering()) # Initialize recommender if self.config.get_recommender() == 'itembased': self.recommender = self.content_based elif self.config.get_recommender() == 'userbased': self.recommender = self.collaborative_filtering elif self.config.get_recommender() == 'hybrid': self.recommender = self else: raise NameError( "Invalid recommender type %s. " "Only options are 'userbased','itembased', and 'hybrid'" % self.config.get_recommender())
class RecommenderSystem(AbstractRecommender): """ A class that will combine the content-based and collaborative-filtering, in order to provide the main functionalities of recommendations. """ def __init__(self, initializer=None, abstracts_preprocessor=None, ratings=None, config=None, process_parser=False, verbose=False, load_matrices=True, dump_matrices=True, train_more=True, random_seed=False, results_file_name='top_recommendations'): """ Constructor of the RecommenderSystem. :param ModelInitializer initializer: A model initializer. :param AbstractsPreprocessor abstracts_preprocessor: A preprocessor of abstracts, if None then queried. :param int[][] ratings: Ratings matrix; if None, matrix gets queried from the database. :param boolean process_parser: A Flag deceiding process the dataparser. :param boolean verbose: A flag deceiding to print progress. :param boolean dump_matrices: A flag for saving output matrices. :param boolean train_more: train_more the collaborative filtering after loading matrices. :param boolean random_seed: A flag to determine if we will use random seed or not. :param str results_file_name: Top recommendations results' file name """ if process_parser: DataParser.process() if ratings is None: self.ratings = numpy.array(DataParser.get_ratings_matrix()) else: self.ratings = ratings if abstracts_preprocessor is None: self.abstracts_preprocessor = AbstractsPreprocessor( DataParser.get_abstracts(), *DataParser.get_word_distribution()) else: self.abstracts_preprocessor = abstracts_preprocessor # Get configurations self.config = RecommenderConfiguration(config) # Set flags self.results_file_name = results_file_name + '.dat' self._verbose = verbose self._dump_matrices = dump_matrices self._load_matrices = load_matrices self._train_more = train_more self._split_type = 'user' self._random_seed = random_seed self.set_hyperparameters(self.config.get_hyperparameters()) self.set_options(self.config.get_options()) self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iter, self._verbose) if self.config.get_error_metric() == 'RMS': self.evaluator = Evaluator(self.ratings, self.abstracts_preprocessor, self._random_seed, self._verbose) else: raise NameError( "Not a valid error metric %s. Only option is 'RMS'" % self.config.get_error_metric()) # Initialize content based. if self.config.get_content_based() == 'None': self.content_based = ContentBased(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA': self.content_based = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA2Vec': self.content_based = LDA2VecRecommender( self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) else: raise NameError( "Not a valid content based %s. Options are 'None', " "'LDA', 'LDA2Vec'" % self.config.get_content_based()) # Initialize collaborative filtering. if self.config.get_collaborative_filtering() == 'ALS': is_hybrid = self.config.get_recommender() == 'hybrid' if self.config.get_content_based() == 'None': raise NameError( "Not valid content based 'None' with hybrid recommender") self.collaborative_filtering = CollaborativeFiltering( self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices, self._train_more, is_hybrid) elif self.config.get_collaborative_filtering() == 'SDAE': self.collaborative_filtering = SDAERecommender( self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) if not self.config.get_content_based() == 'None': raise NameError( "Not a valid content based %s with SDAE. You can only use 'None'" % self.config.get_content_based()) elif self.config.get_collaborative_filtering() == 'None': if not self.config.get_recommender() == 'itembased': raise NameError( "None collaborative filtering is only valid with itembased recommender type" ) elif self.config.get_content_based() == 'None': raise NameError( "Not valid content based 'None' with item-based recommender" ) self.collaborative_filtering = None else: raise NameError("Not a valid collaborative filtering %s. " "Only options are 'None', 'ALS', 'SDAE'" % self.config.get_collaborative_filtering()) # Initialize recommender if self.config.get_recommender() == 'itembased': self.recommender = self.content_based elif self.config.get_recommender() == 'userbased': self.recommender = self.collaborative_filtering elif self.config.get_recommender() == 'hybrid': self.recommender = self else: raise NameError( "Invalid recommender type %s. " "Only options are 'userbased','itembased', and 'hybrid'" % self.config.get_recommender()) @overrides def set_options(self, options): """ Set the options of the recommender. Namely n_iterations and k_folds. :param dict options: A dictionary of the options. """ self.n_iter = options['n_iterations'] self.options = options.copy() @overrides def get_evaluation_report(self): if self.config.get_recommender() == 'hybrid': return self.collaborative_filtering.get_evaluation_report() return self.recommender.get_evaluation_report() @overrides def set_hyperparameters(self, hyperparameters): """ Setter of the hyperparameters of the recommender. :param dict hyperparameters: hyperparameters of the recommender, contains _lambda and n_factors """ self.n_factors = hyperparameters['n_factors'] self._lambda = hyperparameters['_lambda'] self.predictions = None self.hyperparameters = hyperparameters.copy() if hasattr(self, 'collaborative_filtering' ) and self.collaborative_filtering is not None: self.collaborative_filtering.set_hyperparameters(hyperparameters) if hasattr(self, 'content_based') and self.content_based is not None: self.content_based.set_hyperparameters(hyperparameters) @overrides def train(self): """ Train the recommender on the given data. :returns: The error of the predictions. :rtype: float """ assert (self.recommender == self.collaborative_filtering or self.recommender == self.content_based or self.recommender == self) if self._verbose: print("Training content-based %s..." % self.content_based) content_based_error = numpy.inf if self.content_based.__class__ != ContentBased: content_based_error = self.content_based.train() self.content_based.get_predictions() # Optimize unused memory if not self.recommender == self.content_based: del self.content_based.train_data del self.content_based.test_data if hasattr(self.content_based, 'fold_test_indices'): del self.content_based.fold_test_indices if self.recommender == self.collaborative_filtering: theta = None if self.content_based.get_document_topic_distribution( ) is not None: theta = self.content_based.get_document_topic_distribution( ).copy() if self._verbose: print("Training collaborative-filtering %s..." % self.collaborative_filtering) if theta is None: return self.collaborative_filtering.train() else: return self.collaborative_filtering.train(theta) elif self.recommender == self: if self._verbose: print("Training collaborative_filtering %s..." % self.collaborative_filtering) self.collaborative_filtering.set_item_based_recommender( self.content_based) return self.collaborative_filtering.train() self.predictions = self.recommender.get_predictions() return content_based_error @overrides def get_predictions(self): """ Predict ratings for every user and item. :returns: A (user, document) matrix of predictions :rtype: ndarray """ if self.predictions is None: if self.recommender == self: return self.collaborative_filtering.get_predictions() else: self.predictions = self.recommender.get_predictions() return self.predictions
def __init__(self, initializer=None, abstracts_preprocessor=None, ratings=None, config=None, process_parser=False, verbose=False, load_matrices=True, dump_matrices=True, train_more=True): """ Constructor of the RecommenderSystem. :param ModelInitializer initializer: A model initializer. :param AbstractsPreprocessor abstracts_preprocessor: A preprocessor of abstracts, if None then queried. :param int[][] ratings: Ratings matrix; if None, matrix gets queried from the database. :param boolean process_parser: A Flag deceiding process the dataparser. :param boolean verbose: A flag deceiding to print progress. :param boolean dump_matrices: A flag for saving output matrices. :param boolean train_more: train_more the collaborative filtering after loading matrices. """ if process_parser: DataParser.process() if ratings is None: self.ratings = numpy.array(DataParser.get_ratings_matrix()) else: self.ratings = ratings if abstracts_preprocessor is None: self.abstracts_preprocessor = AbstractsPreprocessor(DataParser.get_abstracts(), *DataParser.get_word_distribution()) else: self.abstracts_preprocessor = abstracts_preprocessor # Get configurations self.config = RecommenderConfiguration(config) self.set_hyperparameters(self.config.get_hyperparameters()) self.set_options(self.config.get_options()) # Set flags self._verbose = verbose self._dump_matrices = dump_matrices self._load_matrices = load_matrices self._train_more = train_more self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iter, self._verbose) if self.config.get_error_metric() == 'RMS': self.evaluator = Evaluator(self.ratings, self.abstracts_preprocessor) else: raise NameError("Not a valid error metric %s. Only option is 'RMS'" % self.config.get_error_metric()) # Initialize content based. if self.config.get_content_based() == 'None': self.content_based = ContentBased(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA': self.content_based = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA2Vec': self.content_based = LDA2VecRecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) else: raise NameError("Not a valid content based %s. Options are 'None', " "'LDA', 'LDA2Vec'" % self.config.get_content_based()) # Initialize collaborative filtering. if self.config.get_collaborative_filtering() == 'ALS': self.collaborative_filtering = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices, self._train_more) else: raise NameError("Not a valid collaborative filtering %s. " "Only option is 'ALS'" % self.config.get_collaborative_filtering()) # Initialize recommender if self.config.get_recommender() == 'itembased': self.recommender = self.content_based elif self.config.get_recommender() == 'userbased': self.recommender = self.collaborative_filtering else: raise NameError("Invalid recommender type %s. " "Only options are 'userbased' and 'itembased'" % self.config.get_recommender())
class RecommenderSystem(AbstractRecommender): """ A class that will combine the content-based and collaborative-filtering, in order to provide the main functionalities of recommendations. """ def __init__(self, initializer=None, abstracts_preprocessor=None, ratings=None, config=None, process_parser=False, verbose=False, load_matrices=True, dump_matrices=True, train_more=True): """ Constructor of the RecommenderSystem. :param ModelInitializer initializer: A model initializer. :param AbstractsPreprocessor abstracts_preprocessor: A preprocessor of abstracts, if None then queried. :param int[][] ratings: Ratings matrix; if None, matrix gets queried from the database. :param boolean process_parser: A Flag deceiding process the dataparser. :param boolean verbose: A flag deceiding to print progress. :param boolean dump_matrices: A flag for saving output matrices. :param boolean train_more: train_more the collaborative filtering after loading matrices. """ if process_parser: DataParser.process() if ratings is None: self.ratings = numpy.array(DataParser.get_ratings_matrix()) else: self.ratings = ratings if abstracts_preprocessor is None: self.abstracts_preprocessor = AbstractsPreprocessor(DataParser.get_abstracts(), *DataParser.get_word_distribution()) else: self.abstracts_preprocessor = abstracts_preprocessor # Get configurations self.config = RecommenderConfiguration(config) self.set_hyperparameters(self.config.get_hyperparameters()) self.set_options(self.config.get_options()) # Set flags self._verbose = verbose self._dump_matrices = dump_matrices self._load_matrices = load_matrices self._train_more = train_more self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iter, self._verbose) if self.config.get_error_metric() == 'RMS': self.evaluator = Evaluator(self.ratings, self.abstracts_preprocessor) else: raise NameError("Not a valid error metric %s. Only option is 'RMS'" % self.config.get_error_metric()) # Initialize content based. if self.config.get_content_based() == 'None': self.content_based = ContentBased(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA': self.content_based = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA2Vec': self.content_based = LDA2VecRecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) else: raise NameError("Not a valid content based %s. Options are 'None', " "'LDA', 'LDA2Vec'" % self.config.get_content_based()) # Initialize collaborative filtering. if self.config.get_collaborative_filtering() == 'ALS': self.collaborative_filtering = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices, self._train_more) else: raise NameError("Not a valid collaborative filtering %s. " "Only option is 'ALS'" % self.config.get_collaborative_filtering()) # Initialize recommender if self.config.get_recommender() == 'itembased': self.recommender = self.content_based elif self.config.get_recommender() == 'userbased': self.recommender = self.collaborative_filtering else: raise NameError("Invalid recommender type %s. " "Only options are 'userbased' and 'itembased'" % self.config.get_recommender()) @overrides def set_options(self, options): """ Set the options of the recommender. Namely n_iterations and k_folds. :param dict options: A dictionary of the options. """ self.n_iter = options['n_iterations'] self.options = options.copy() @overrides def get_evaluation_report(self): return self.recommender.get_evaluation_report() @overrides def set_hyperparameters(self, hyperparameters): """ The function sets the hyperparameters of the uv_decomposition algorithm :param dict hyperparameters: hyperparameters of the recommender, contains _lambda and n_factors """ self.n_factors = hyperparameters['n_factors'] self._lambda = hyperparameters['_lambda'] self.hyperparameters = hyperparameters.copy() @overrides def train(self): """ Train the recommender on the given data. :returns: The error of the predictions. :rtype: float """ if self._verbose: print("Training content-based %s..." % self.content_based) self.content_based.train() assert self.recommender == self.collaborative_filtering or self.recommender == self.content_based if self.recommender == self.collaborative_filtering: theta = self.content_based.get_document_topic_distribution().copy() if self._verbose: print("Training collaborative-filtering %s..." % self.collaborative_filtering) self.collaborative_filtering.train(theta) self.predictions = self.recommender.get_predictions() if self._verbose: print("done training...")