예제 #1
0
    def setUp(self):
        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)

        config = load_config(file_path="/vagrant/config.yaml", logger=logger, exit_with_error=True)

        #Connect to test database
        connect("nyan_test", port=27017)
        fill_database()
        #connect(config['database']['db-name'], 
        #        username= config['database']['user'], 
        #        password= config['database']['passwd'], 
        #        port = config['database']['port'])

        self.user_id = User.objects(email=u'*****@*****.**').first().id
        #feature_extractor = EsaFeatureExtractor(prefix = config['prefix'])
        feature_extractor = TfidfFeatureExtractor(prefix=config['prefix'])
        self.trainer = UserModelBayes(self.user_id, extractor=feature_extractor)
예제 #2
0
class UserModelBayesTest(unittest.TestCase):
    def setUp(self):
        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)

        config = load_config(file_path="/vagrant/config.yaml", logger=logger, exit_with_error=True)

        #Connect to test database
        connect("nyan_test", port=27017)
        fill_database()
        #connect(config['database']['db-name'], 
        #        username= config['database']['user'], 
        #        password= config['database']['passwd'], 
        #        port = config['database']['port'])

        self.user_id = User.objects(email=u'*****@*****.**').first().id
        #feature_extractor = EsaFeatureExtractor(prefix = config['prefix'])
        feature_extractor = TfidfFeatureExtractor(prefix=config['prefix'])
        self.trainer = UserModelBayes(self.user_id, extractor=feature_extractor)

    def tearDown(self):
        clear_database()

    @unittest.skip("training")
    def test_save_load(self):
        self.trainer.train()

        tmp_classifier = self.trainer.clf

        self.trainer.save()
        self.trainer.load()

        self.assertEqual(tmp_classifier.sigma_.all(),
                         self.trainer.clf.sigma_.all())
        self.assertEqual(tmp_classifier.theta_.all(),
                         self.trainer.clf.theta_.all())

    @unittest.skip("training")
    def test_get_unread(self):
        unread_articles = self.trainer._get_unread()

        headlines = [a.headline for a in unread_articles]

        self.assertIn(u"Apple = Bad", headlines)
        self.assertNotIn(u"Apple", headlines)
        self.assertEqual(len(headlines), 1)

    @unittest.skip("ranking")
    def test_rank(self):
        self.trainer.train()

        unread_doc = Article.objects(headline=u"Sony = Bad").first()
        read_doc = Article.objects(headline=u"Apple").first()

        rank_unread_doc = self.trainer.rank(unread_doc)
        rank_read_doc = self.trainer.rank(read_doc)

        self.assertEqual(rank_unread_doc, UserModelBayes.UNREAD)
        self.assertEqual(rank_read_doc, UserModelBayes.READ)