Exemple #1
0
                y_true[: len(evaluation_set_read)] = user_model.READ
                y_true[len(evaluation_set_read) :] = user_model.UNREAD

                # Set y_pred
                y_pred = np.empty(shape=(y_true.shape[0]))
                predicted_interesting_headlines = list()
                actual_interesting_headlines = list()

                # predict with other subset and record measures
                for i, article_id in enumerate(evaluation_set_read):
                    article = Article.objects(id=article_id).first()
                    if article is None:
                        continue

                    # Predict and record result
                    result = user_model.rank(doc=article)
                    y_pred[i] = result

                    # Redcord headline
                    if result == user_model.READ:
                        actual_interesting_headlines.append(article.headline)

                    actual_interesting_headlines.append(article.headline)

                for i, article_id in enumerate(evaluation_set_unread, start=len(evaluation_set_read)):
                    article = Article.objects(id=article_id).first()
                    if article is None:
                        continue

                    # Predict and record result
                    result = user_model.rank(doc=article)
class UserModelSVMTest(unittest.TestCase):
    def setUp(self):
        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)

        config = load_config(file_path="/vagrant/config.yaml", logger=logger, exit_with_error=True)

        #Connect to test database
        connect("nyan_test", port=27017)
        fill_database()
        #connect(config['database']['db-name'], 
        #        username= config['database']['user'], 
        #        password= config['database']['passwd'], 
        #        port = config['database']['port'])

        self.user_id = User.objects(email=u'*****@*****.**').first().id
        #feature_extractor = EsaFeatureExtractor(prefix = config['prefix'])
        feature_extractor = TfidfFeatureExtractor(prefix=config['prefix'])
        self.trainer = UserModelSVM(self.user_id, extractor=feature_extractor)

    def tearDown(self):
        clear_database()

    def test_mean_std_deviation(self):
        X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [3, 0], [3, 2]])

        self.trainer._calculate_mean_and_std_deviation(X)

        self.assertAlmostEqual(self.trainer.theta_[0], 0.16666, 4)
        self.assertAlmostEqual(self.trainer.theta_[1], -0.16666, 4)
        self.assertAlmostEqual(self.trainer.sigma_[0], 2.33927, 4)
        self.assertAlmostEqual(self.trainer.sigma_[1], 1.3437, 4)

    def test_normalize(self):
        X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [3, 0], [3, 2]])

        self.trainer.theta_ = np.array([1, -2], dtype=np.float32)
        self.trainer.sigma_ = np.array([2, 1], dtype=np.float32)
        X = self.trainer._normalize(X)

        self.assertEqual(X[1, 0], -1.5)

    def test_normalize_no_theta(self):
        X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [3, 0], [3, 2]])

        self.assertRaises(AttributeError, lambda: self.trainer._normalize(X))

        #dummy set theta_ but not sigma_
        self.trainer.theta_ = np.array([1, -2], dtype=np.float32)

        self.assertRaises(AttributeError, lambda: self.trainer._normalize(X))

    def test_save_load_theta_sigma(self):
        X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [3, 0], [3, 2]])

        self.trainer._calculate_mean_and_std_deviation(X)

        self.trainer.clf = "dummy"

        tmp_theta = self.trainer.theta_
        tmp_sigma = self.trainer.sigma_

        self.trainer.save()
        self.trainer.load()

        #Check normalization parameters
        self.assertEqual(tmp_theta.all(),
                         self.trainer.theta_.all())
        self.assertEqual(tmp_sigma.all(),
                         self.trainer.sigma_.all())

    @unittest.skip("No ranking implemented yet")
    def test_rank(self):
        self.trainer.train()

        unread_doc = Article.objects(headline=u"Sony = Bad").first()
        read_doc = Article.objects(headline=u"Apple").first()

        rank_unread_doc = self.trainer.rank(unread_doc)
        rank_read_doc = self.trainer.rank(read_doc)

        self.assertEqual(rank_unread_doc, UserModelBayes.UNREAD)
        self.assertEqual(rank_read_doc, UserModelBayes.READ)