Example #1
0
class TestcaseBase(unittest.TestCase):
    def setUp(self):
        """
        Setup method that is called at the beginning of each test.
        """
        self.documents, self.users = 18, 10
        documents_cnt, users_cnt = self.documents, self.users
        self.n_iterations = 15
        self.k_folds = 3
        self.hyperparameters = {'n_factors': 5, '_lambda': 0.01}
        self.options = {'n_iterations': self.n_iterations, 'k_folds': self.k_folds}
        self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iterations)
        self.n_recommendations = 1

        def mock_get_ratings_matrix(self=None):
            return [[int(not bool((article + user) % 3)) for article in range(documents_cnt)]
                    for user in range(users_cnt)]

        self.ratings_matrix = numpy.array(mock_get_ratings_matrix())
        setattr(DataParser, "get_ratings_matrix", mock_get_ratings_matrix)

        self.evaluator = Evaluator(self.ratings_matrix)
        self.cf = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters,
                                         self.options, load_matrices=True)
        self.cf.train()
        self.cf.evaluator.k_folds = self.k_folds
        self.test_data = self.cf.test_data
        self.predictions = self.cf.get_predictions()
        self.rounded_predictions = self.cf.rounded_predictions()
Example #2
0
    def run_collaborative(self):
        """
        Runs collaborative filtering
        """
        ALS = CollaborativeFiltering(self.initializer, self.evaluator,
                                     self.hyperparameters, self.options,
                                     self.verbose, self.load_matrices,
                                     self.dump)

        ALS.train()
        ALS.get_evaluation_report()
        print(
            ALS.evaluator.calculate_recall(ALS.ratings,
                                           ALS.rounded_predictions()))
        print(
            ALS.evaluator.recall_at_x(1, ALS.get_predictions(), ALS.test_data,
                                      ALS.rounded_predictions()))
Example #3
0
 def runTest(self):
     cf = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters,
                                 self.options, load_matrices=False)
     self.assertEqual(cf.n_factors, self.n_factors)
     self.assertEqual(cf.n_items, self.documents)
     cf.train()
     self.assertEqual(cf.get_predictions().shape, (self.users, self.documents))
     self.assertTrue(isinstance(cf, AbstractRecommender))
     shape = (self.users, self.documents)
     ratings = cf.get_ratings()
     self.assertLessEqual(numpy.amax(ratings), 1 + 1e-6)
     self.assertGreaterEqual(numpy.amin(ratings), -1e-6)
     self.assertTrue(ratings.shape == shape)
     rounded_predictions = cf.rounded_predictions()
     self.assertLessEqual(numpy.amax(rounded_predictions), 1 + 1e-6)
     self.assertGreaterEqual(numpy.amin(rounded_predictions), -1e-6)
     self.assertTrue(rounded_predictions.shape == shape)
     recall = cf.evaluator.calculate_recall(ratings, cf.get_predictions())
     self.assertTrue(-1e-6 <= recall <= 1 + 1e-6)
     random_user = int(numpy.random.random() * self.users)
     random_item = int(numpy.random.random() * self.documents)
     random_prediction = cf.predict(random_user, random_item)
     self.assertTrue(isinstance(random_prediction, numpy.float64))
Example #4
0
    def run_collaborative(self):
        """
        Runs collaborative filtering
        """
        ALS = CollaborativeFiltering(self.initializer, self.evaluator,
                                     self.hyperparameters, self.options,
                                     self.verbose, self.load_matrices,
                                     self.dump, self.train_more)

        results = ALS.train()
        report_str = 'Summary: Test sum {:.2f}, Train sum {:.2f}, Final error {:.5f}, train recall {:.5f}, '\
                     'test recall {:.5f}, recall@200 {:.5f}, '\
                     'ratio {:.5f}, mrr@5 {:.5f}, '\
                     'ndcg@5 {:.5f}, mrr@10 {:.5f}, ndcg@10 {:.5f}'
        print(report_str.format(*results))
Example #5
0
class RecommenderSystem(AbstractRecommender):
    """
    A class that will combine the content-based and collaborative-filtering,
    in order to provide the main functionalities of recommendations.
    """
    def __init__(self, initializer=None, abstracts_preprocessor=None, ratings=None, config=None,
                 process_parser=False, verbose=False, load_matrices=True, dump_matrices=True, train_more=True):
        """
        Constructor of the RecommenderSystem.

        :param ModelInitializer initializer: A model initializer.
        :param AbstractsPreprocessor abstracts_preprocessor: A preprocessor of abstracts, if None then queried.
        :param int[][] ratings: Ratings matrix; if None, matrix gets queried from the database.
        :param boolean process_parser: A Flag deceiding process the dataparser.
        :param boolean verbose: A flag deceiding to print progress.
        :param boolean dump_matrices: A flag for saving output matrices.
        :param boolean train_more: train_more the collaborative filtering after loading matrices.
        """
        if process_parser:
            DataParser.process()

        if ratings is None:
            self.ratings = numpy.array(DataParser.get_ratings_matrix())
        else:
            self.ratings = ratings

        if abstracts_preprocessor is None:
            self.abstracts_preprocessor = AbstractsPreprocessor(DataParser.get_abstracts(),
                                                                *DataParser.get_word_distribution())
        else:
            self.abstracts_preprocessor = abstracts_preprocessor

        # Get configurations
        self.config = RecommenderConfiguration(config)
        self.set_hyperparameters(self.config.get_hyperparameters())
        self.set_options(self.config.get_options())

        # Set flags
        self._verbose = verbose
        self._dump_matrices = dump_matrices
        self._load_matrices = load_matrices
        self._train_more = train_more

        self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iter, self._verbose)

        if self.config.get_error_metric() == 'RMS':
            self.evaluator = Evaluator(self.ratings, self.abstracts_preprocessor)
        else:
            raise NameError("Not a valid error metric %s. Only option is 'RMS'" % self.config.get_error_metric())

        # Initialize content based.
        if self.config.get_content_based() == 'None':
            self.content_based = ContentBased(self.initializer, self.evaluator, self.hyperparameters, self.options,
                                              self._verbose, self._load_matrices, self._dump_matrices)
        elif self.config.get_content_based() == 'LDA':
            self.content_based = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options,
                                                self._verbose, self._load_matrices, self._dump_matrices)
        elif self.config.get_content_based() == 'LDA2Vec':
            self.content_based = LDA2VecRecommender(self.initializer, self.evaluator, self.hyperparameters,
                                                    self.options, self._verbose,
                                                    self._load_matrices, self._dump_matrices)
        else:
            raise NameError("Not a valid content based %s. Options are 'None', "
                            "'LDA', 'LDA2Vec'" % self.config.get_content_based())

        # Initialize collaborative filtering.
        if self.config.get_collaborative_filtering() == 'ALS':
            self.collaborative_filtering = CollaborativeFiltering(self.initializer, self.evaluator,
                                                                  self.hyperparameters, self.options,
                                                                  self._verbose, self._load_matrices,
                                                                  self._dump_matrices, self._train_more)
        else:
            raise NameError("Not a valid collaborative filtering %s. "
                            "Only option is 'ALS'" % self.config.get_collaborative_filtering())

        # Initialize recommender
        if self.config.get_recommender() == 'itembased':
            self.recommender = self.content_based
        elif self.config.get_recommender() == 'userbased':
            self.recommender = self.collaborative_filtering
        else:
            raise NameError("Invalid recommender type %s. "
                            "Only options are 'userbased' and 'itembased'" % self.config.get_recommender())

    @overrides
    def set_options(self, options):
        """
        Set the options of the recommender. Namely n_iterations and k_folds.

        :param dict options: A dictionary of the options.
        """
        self.n_iter = options['n_iterations']
        self.options = options.copy()

    @overrides
    def get_evaluation_report(self):
        return self.recommender.get_evaluation_report()

    @overrides
    def set_hyperparameters(self, hyperparameters):
        """
        The function sets the hyperparameters of the uv_decomposition algorithm

        :param dict hyperparameters: hyperparameters of the recommender, contains _lambda and n_factors
        """
        self.n_factors = hyperparameters['n_factors']
        self._lambda = hyperparameters['_lambda']
        self.hyperparameters = hyperparameters.copy()

    @overrides
    def train(self):
        """
        Train the recommender on the given data.

        :returns: The error of the predictions.
        :rtype: float
        """
        if self._verbose:
            print("Training content-based %s..." % self.content_based)
        self.content_based.train()
        assert self.recommender == self.collaborative_filtering or self.recommender == self.content_based
        if self.recommender == self.collaborative_filtering:
            theta = self.content_based.get_document_topic_distribution().copy()
            if self._verbose:
                print("Training collaborative-filtering %s..." % self.collaborative_filtering)
            self.collaborative_filtering.train(theta)
        self.predictions = self.recommender.get_predictions()
        if self._verbose:
            print("done training...")