Beispiel #1
0
    def setUp(self):
        """
        Setup method that is called at the beginning of each test.
        """
        self.documents, self.users = 18, 10
        documents_cnt, users_cnt = self.documents, self.users
        self.n_iterations = 15
        self.k_folds = 3
        self.hyperparameters = {'n_factors': 5, '_lambda': 0.01}
        self.options = {'n_iterations': self.n_iterations, 'k_folds': self.k_folds}
        self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iterations)
        self.n_recommendations = 1

        def mock_get_ratings_matrix(self=None):
            return [[int(not bool((article + user) % 3)) for article in range(documents_cnt)]
                    for user in range(users_cnt)]

        self.ratings_matrix = numpy.array(mock_get_ratings_matrix())
        setattr(DataParser, "get_ratings_matrix", mock_get_ratings_matrix)

        self.evaluator = Evaluator(self.ratings_matrix)
        self.cf = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters,
                                         self.options, load_matrices=True)
        self.cf.train()
        self.cf.evaluator.k_folds = self.k_folds
        self.test_data = self.cf.test_data
        self.predictions = self.cf.get_predictions()
        self.rounded_predictions = self.cf.rounded_predictions()
Beispiel #2
0
    def run_collaborative(self):
        """
        Runs collaborative filtering
        """
        ALS = CollaborativeFiltering(self.initializer, self.evaluator,
                                     self.hyperparameters, self.options,
                                     self.verbose, self.load_matrices,
                                     self.dump, self.train_more)

        results = ALS.train()
        report_str = 'Summary: Test sum {:.2f}, Train sum {:.2f}, Final error {:.5f}, train recall {:.5f}, '\
                     'test recall {:.5f}, recall@200 {:.5f}, '\
                     'ratio {:.5f}, mrr@5 {:.5f}, '\
                     'ndcg@5 {:.5f}, mrr@10 {:.5f}, ndcg@10 {:.5f}'
        print(report_str.format(*results))
Beispiel #3
0
 def runTest(self):
     evaluator = Evaluator(self.ratings_matrix)
     cf = CollaborativeFiltering(self.initializer,
                                 evaluator,
                                 self.initial_config,
                                 self.options,
                                 load_matrices=True)
     grid_search = GridSearch(cf, self.hyperparameters, False)
     self.checkKeyGenerator(grid_search)
     self.checkCombinationsGenerator(grid_search)
     self.checkGridSearch(grid_search)
Beispiel #4
0
    def run_collaborative(self):
        """
        Runs collaborative filtering
        """
        ALS = CollaborativeFiltering(self.initializer, self.evaluator,
                                     self.hyperparameters, self.options,
                                     self.verbose, self.load_matrices,
                                     self.dump)

        ALS.train()
        ALS.get_evaluation_report()
        print(
            ALS.evaluator.calculate_recall(ALS.ratings,
                                           ALS.rounded_predictions()))
        print(
            ALS.evaluator.recall_at_x(1, ALS.get_predictions(), ALS.test_data,
                                      ALS.rounded_predictions()))
Beispiel #5
0
 def run_grid_search(self):
     """
     runs grid search
     """
     hyperparameters = {
         '_lambda': [0.00001, 0.01, 0.1, 0.5, 10],
         'n_factors': [100, 200, 300, 400, 500]
     }
     ALS = CollaborativeFiltering(self.initializer, self.evaluator,
                                  self.hyperparameters, self.options,
                                  self.verbose, self.load_matrices,
                                  self.dump, self.train_more)
     GS = GridSearch(ALS, hyperparameters, self.verbose)
     best_params, all_results = GS.train()
     for result in all_results:
         print(result)
 def runTest(self):
     cf = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters,
                                 self.options, load_matrices=False)
     self.assertEqual(cf.n_factors, self.n_factors)
     self.assertEqual(cf.n_items, self.documents)
     cf.train()
     self.assertEqual(cf.get_predictions().shape, (self.users, self.documents))
     self.assertTrue(isinstance(cf, AbstractRecommender))
     shape = (self.users, self.documents)
     ratings = cf.get_ratings()
     self.assertLessEqual(numpy.amax(ratings), 1 + 1e-6)
     self.assertGreaterEqual(numpy.amin(ratings), -1e-6)
     self.assertTrue(ratings.shape == shape)
     rounded_predictions = cf.rounded_predictions()
     self.assertLessEqual(numpy.amax(rounded_predictions), 1 + 1e-6)
     self.assertGreaterEqual(numpy.amin(rounded_predictions), -1e-6)
     self.assertTrue(rounded_predictions.shape == shape)
     recall = cf.evaluator.calculate_recall(ratings, cf.get_predictions())
     self.assertTrue(-1e-6 <= recall <= 1 + 1e-6)
     random_user = int(numpy.random.random() * self.users)
     random_item = int(numpy.random.random() * self.documents)
     random_prediction = cf.predict(random_user, random_item)
     self.assertTrue(isinstance(random_prediction, numpy.float64))
Beispiel #7
0
    def __init__(self,
                 initializer=None,
                 abstracts_preprocessor=None,
                 ratings=None,
                 config=None,
                 process_parser=False,
                 verbose=False,
                 load_matrices=True,
                 dump_matrices=True,
                 train_more=True,
                 random_seed=False,
                 results_file_name='top_recommendations'):
        """
        Constructor of the RecommenderSystem.

        :param ModelInitializer initializer: A model initializer.
        :param AbstractsPreprocessor abstracts_preprocessor: A preprocessor of abstracts, if None then queried.
        :param int[][] ratings: Ratings matrix; if None, matrix gets queried from the database.
        :param boolean process_parser: A Flag deceiding process the dataparser.
        :param boolean verbose: A flag deceiding to print progress.
        :param boolean dump_matrices: A flag for saving output matrices.
        :param boolean train_more: train_more the collaborative filtering after loading matrices.
        :param boolean random_seed: A flag to determine if we will use random seed or not.
        :param str results_file_name: Top recommendations results' file name
        """
        if process_parser:
            DataParser.process()

        if ratings is None:
            self.ratings = numpy.array(DataParser.get_ratings_matrix())
        else:
            self.ratings = ratings

        if abstracts_preprocessor is None:
            self.abstracts_preprocessor = AbstractsPreprocessor(
                DataParser.get_abstracts(),
                *DataParser.get_word_distribution())
        else:
            self.abstracts_preprocessor = abstracts_preprocessor

        # Get configurations
        self.config = RecommenderConfiguration(config)

        # Set flags
        self.results_file_name = results_file_name + '.dat'
        self._verbose = verbose
        self._dump_matrices = dump_matrices
        self._load_matrices = load_matrices
        self._train_more = train_more
        self._split_type = 'user'
        self._random_seed = random_seed

        self.set_hyperparameters(self.config.get_hyperparameters())
        self.set_options(self.config.get_options())

        self.initializer = ModelInitializer(self.hyperparameters.copy(),
                                            self.n_iter, self._verbose)

        if self.config.get_error_metric() == 'RMS':
            self.evaluator = Evaluator(self.ratings,
                                       self.abstracts_preprocessor,
                                       self._random_seed, self._verbose)
        else:
            raise NameError(
                "Not a valid error metric %s. Only option is 'RMS'" %
                self.config.get_error_metric())

        # Initialize content based.
        if self.config.get_content_based() == 'None':
            self.content_based = ContentBased(self.initializer, self.evaluator,
                                              self.hyperparameters,
                                              self.options, self._verbose,
                                              self._load_matrices,
                                              self._dump_matrices)
        elif self.config.get_content_based() == 'LDA':
            self.content_based = LDARecommender(self.initializer,
                                                self.evaluator,
                                                self.hyperparameters,
                                                self.options, self._verbose,
                                                self._load_matrices,
                                                self._dump_matrices)
        elif self.config.get_content_based() == 'LDA2Vec':
            self.content_based = LDA2VecRecommender(
                self.initializer, self.evaluator, self.hyperparameters,
                self.options, self._verbose, self._load_matrices,
                self._dump_matrices)
        else:
            raise NameError(
                "Not a valid content based %s. Options are 'None', "
                "'LDA', 'LDA2Vec'" % self.config.get_content_based())

        # Initialize collaborative filtering.
        if self.config.get_collaborative_filtering() == 'ALS':
            is_hybrid = self.config.get_recommender() == 'hybrid'
            if self.config.get_content_based() == 'None':
                raise NameError(
                    "Not valid content based 'None' with hybrid recommender")
            self.collaborative_filtering = CollaborativeFiltering(
                self.initializer, self.evaluator, self.hyperparameters,
                self.options, self._verbose, self._load_matrices,
                self._dump_matrices, self._train_more, is_hybrid)
        elif self.config.get_collaborative_filtering() == 'SDAE':
            self.collaborative_filtering = SDAERecommender(
                self.initializer, self.evaluator, self.hyperparameters,
                self.options, self._verbose, self._load_matrices,
                self._dump_matrices)
            if not self.config.get_content_based() == 'None':
                raise NameError(
                    "Not a valid content based %s with SDAE. You can only use 'None'"
                    % self.config.get_content_based())
        elif self.config.get_collaborative_filtering() == 'None':
            if not self.config.get_recommender() == 'itembased':
                raise NameError(
                    "None collaborative filtering is only valid with itembased recommender type"
                )
            elif self.config.get_content_based() == 'None':
                raise NameError(
                    "Not valid content based 'None' with item-based recommender"
                )
            self.collaborative_filtering = None
        else:
            raise NameError("Not a valid collaborative filtering %s. "
                            "Only options are 'None', 'ALS', 'SDAE'" %
                            self.config.get_collaborative_filtering())

        # Initialize recommender
        if self.config.get_recommender() == 'itembased':
            self.recommender = self.content_based
        elif self.config.get_recommender() == 'userbased':
            self.recommender = self.collaborative_filtering
        elif self.config.get_recommender() == 'hybrid':
            self.recommender = self
        else:
            raise NameError(
                "Invalid recommender type %s. "
                "Only options are 'userbased','itembased', and 'hybrid'" %
                self.config.get_recommender())
Beispiel #8
0
    def __init__(self, initializer=None, abstracts_preprocessor=None, ratings=None, config=None,
                 process_parser=False, verbose=False, load_matrices=True, dump_matrices=True, train_more=True):
        """
        Constructor of the RecommenderSystem.

        :param ModelInitializer initializer: A model initializer.
        :param AbstractsPreprocessor abstracts_preprocessor: A preprocessor of abstracts, if None then queried.
        :param int[][] ratings: Ratings matrix; if None, matrix gets queried from the database.
        :param boolean process_parser: A Flag deceiding process the dataparser.
        :param boolean verbose: A flag deceiding to print progress.
        :param boolean dump_matrices: A flag for saving output matrices.
        :param boolean train_more: train_more the collaborative filtering after loading matrices.
        """
        if process_parser:
            DataParser.process()

        if ratings is None:
            self.ratings = numpy.array(DataParser.get_ratings_matrix())
        else:
            self.ratings = ratings

        if abstracts_preprocessor is None:
            self.abstracts_preprocessor = AbstractsPreprocessor(DataParser.get_abstracts(),
                                                                *DataParser.get_word_distribution())
        else:
            self.abstracts_preprocessor = abstracts_preprocessor

        # Get configurations
        self.config = RecommenderConfiguration(config)
        self.set_hyperparameters(self.config.get_hyperparameters())
        self.set_options(self.config.get_options())

        # Set flags
        self._verbose = verbose
        self._dump_matrices = dump_matrices
        self._load_matrices = load_matrices
        self._train_more = train_more

        self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iter, self._verbose)

        if self.config.get_error_metric() == 'RMS':
            self.evaluator = Evaluator(self.ratings, self.abstracts_preprocessor)
        else:
            raise NameError("Not a valid error metric %s. Only option is 'RMS'" % self.config.get_error_metric())

        # Initialize content based.
        if self.config.get_content_based() == 'None':
            self.content_based = ContentBased(self.initializer, self.evaluator, self.hyperparameters, self.options,
                                              self._verbose, self._load_matrices, self._dump_matrices)
        elif self.config.get_content_based() == 'LDA':
            self.content_based = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options,
                                                self._verbose, self._load_matrices, self._dump_matrices)
        elif self.config.get_content_based() == 'LDA2Vec':
            self.content_based = LDA2VecRecommender(self.initializer, self.evaluator, self.hyperparameters,
                                                    self.options, self._verbose,
                                                    self._load_matrices, self._dump_matrices)
        else:
            raise NameError("Not a valid content based %s. Options are 'None', "
                            "'LDA', 'LDA2Vec'" % self.config.get_content_based())

        # Initialize collaborative filtering.
        if self.config.get_collaborative_filtering() == 'ALS':
            self.collaborative_filtering = CollaborativeFiltering(self.initializer, self.evaluator,
                                                                  self.hyperparameters, self.options,
                                                                  self._verbose, self._load_matrices,
                                                                  self._dump_matrices, self._train_more)
        else:
            raise NameError("Not a valid collaborative filtering %s. "
                            "Only option is 'ALS'" % self.config.get_collaborative_filtering())

        # Initialize recommender
        if self.config.get_recommender() == 'itembased':
            self.recommender = self.content_based
        elif self.config.get_recommender() == 'userbased':
            self.recommender = self.collaborative_filtering
        else:
            raise NameError("Invalid recommender type %s. "
                            "Only options are 'userbased' and 'itembased'" % self.config.get_recommender())
Beispiel #9
0
class RecommenderSystem(AbstractRecommender):
    """
    A class that will combine the content-based and collaborative-filtering,
    in order to provide the main functionalities of recommendations.
    """
    def __init__(self, initializer=None, abstracts_preprocessor=None, ratings=None, config=None,
                 process_parser=False, verbose=False, load_matrices=True, dump_matrices=True, train_more=True):
        """
        Constructor of the RecommenderSystem.

        :param ModelInitializer initializer: A model initializer.
        :param AbstractsPreprocessor abstracts_preprocessor: A preprocessor of abstracts, if None then queried.
        :param int[][] ratings: Ratings matrix; if None, matrix gets queried from the database.
        :param boolean process_parser: A Flag deceiding process the dataparser.
        :param boolean verbose: A flag deceiding to print progress.
        :param boolean dump_matrices: A flag for saving output matrices.
        :param boolean train_more: train_more the collaborative filtering after loading matrices.
        """
        if process_parser:
            DataParser.process()

        if ratings is None:
            self.ratings = numpy.array(DataParser.get_ratings_matrix())
        else:
            self.ratings = ratings

        if abstracts_preprocessor is None:
            self.abstracts_preprocessor = AbstractsPreprocessor(DataParser.get_abstracts(),
                                                                *DataParser.get_word_distribution())
        else:
            self.abstracts_preprocessor = abstracts_preprocessor

        # Get configurations
        self.config = RecommenderConfiguration(config)
        self.set_hyperparameters(self.config.get_hyperparameters())
        self.set_options(self.config.get_options())

        # Set flags
        self._verbose = verbose
        self._dump_matrices = dump_matrices
        self._load_matrices = load_matrices
        self._train_more = train_more

        self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iter, self._verbose)

        if self.config.get_error_metric() == 'RMS':
            self.evaluator = Evaluator(self.ratings, self.abstracts_preprocessor)
        else:
            raise NameError("Not a valid error metric %s. Only option is 'RMS'" % self.config.get_error_metric())

        # Initialize content based.
        if self.config.get_content_based() == 'None':
            self.content_based = ContentBased(self.initializer, self.evaluator, self.hyperparameters, self.options,
                                              self._verbose, self._load_matrices, self._dump_matrices)
        elif self.config.get_content_based() == 'LDA':
            self.content_based = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options,
                                                self._verbose, self._load_matrices, self._dump_matrices)
        elif self.config.get_content_based() == 'LDA2Vec':
            self.content_based = LDA2VecRecommender(self.initializer, self.evaluator, self.hyperparameters,
                                                    self.options, self._verbose,
                                                    self._load_matrices, self._dump_matrices)
        else:
            raise NameError("Not a valid content based %s. Options are 'None', "
                            "'LDA', 'LDA2Vec'" % self.config.get_content_based())

        # Initialize collaborative filtering.
        if self.config.get_collaborative_filtering() == 'ALS':
            self.collaborative_filtering = CollaborativeFiltering(self.initializer, self.evaluator,
                                                                  self.hyperparameters, self.options,
                                                                  self._verbose, self._load_matrices,
                                                                  self._dump_matrices, self._train_more)
        else:
            raise NameError("Not a valid collaborative filtering %s. "
                            "Only option is 'ALS'" % self.config.get_collaborative_filtering())

        # Initialize recommender
        if self.config.get_recommender() == 'itembased':
            self.recommender = self.content_based
        elif self.config.get_recommender() == 'userbased':
            self.recommender = self.collaborative_filtering
        else:
            raise NameError("Invalid recommender type %s. "
                            "Only options are 'userbased' and 'itembased'" % self.config.get_recommender())

    @overrides
    def set_options(self, options):
        """
        Set the options of the recommender. Namely n_iterations and k_folds.

        :param dict options: A dictionary of the options.
        """
        self.n_iter = options['n_iterations']
        self.options = options.copy()

    @overrides
    def get_evaluation_report(self):
        return self.recommender.get_evaluation_report()

    @overrides
    def set_hyperparameters(self, hyperparameters):
        """
        The function sets the hyperparameters of the uv_decomposition algorithm

        :param dict hyperparameters: hyperparameters of the recommender, contains _lambda and n_factors
        """
        self.n_factors = hyperparameters['n_factors']
        self._lambda = hyperparameters['_lambda']
        self.hyperparameters = hyperparameters.copy()

    @overrides
    def train(self):
        """
        Train the recommender on the given data.

        :returns: The error of the predictions.
        :rtype: float
        """
        if self._verbose:
            print("Training content-based %s..." % self.content_based)
        self.content_based.train()
        assert self.recommender == self.collaborative_filtering or self.recommender == self.content_based
        if self.recommender == self.collaborative_filtering:
            theta = self.content_based.get_document_topic_distribution().copy()
            if self._verbose:
                print("Training collaborative-filtering %s..." % self.collaborative_filtering)
            self.collaborative_filtering.train(theta)
        self.predictions = self.recommender.get_predictions()
        if self._verbose:
            print("done training...")