def setUp(self):
        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', 
                            level=logging.DEBUG)
            
        
        config = load_config(file_path = ("/media/sdc1/Aptana Studio 3 Workspace"
                                          "/configs/config.yaml"),
                             logger = logger,
                             exit_with_error = True)
        
        #Connect to test database
        connect("nyan_test", port = 20545)
        fill_database()
        #connect(config['database']['db-name'], 
        #        username= config['database']['user'], 
        #        password= config['database']['passwd'], 
        #        port = config['database']['port'])

        self.user_id = User.objects(email = u'*****@*****.**').first().id
        #feature_extractor = EsaFeatureExtractor(prefix = config['prefix'])
        feature_extractor = TfidfFeatureExtractor(prefix = config['prefix'])
        self.trainer = UserModelSVM(self.user_id, extractor = feature_extractor)
Beispiel #2
0
 #             "Evaluationset: %d (read: %d, unread: %d).") %
 #            (len(training_set_read)+len(training_set_unread), 
 #             len(training_set_read), 
 #             len(training_set_unread),
 #             len(evaluation_set_read)+len(evaluation_set_unread), 
 #             len(evaluation_set_read), 
 #             len(evaluation_set_unread)))
 
 #learn on subset
 #user_model = UserModelBayes(user_id = user.id,
 #                            extractor = feature_extractor)
 
 #user_model = UserModelCentroid(user_id = user.id,
 #                               extractor = feature_extractor)
 
 user_model = UserModelSVM(user_id = user.id,
                           extractor = feature_extractor)
 
 #user_model = UserModelTree(user_id = user.id,
 #                           extractor = feature_extractor)
 user_model.set_samples_sizes(p_synthetic, p_majority)
 #user_model.set_samples_sizes(p_synthetic_samples = None, 
 #                             p_majority_samples = None)
 
 #user_model = UserModelMeta(user_id = user.id,
 #                           extractor = feature_extractor)
 
 user_model.train(read_article_ids = training_set_read, 
                  unread_article_ids = training_set_unread)
 
 #Set y_true
 y_true = np.empty(shape=(len(evaluation_set_read) + len(evaluation_set_unread)))
class UserModelSVMTest(unittest.TestCase):

    def setUp(self):
        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', 
                            level=logging.DEBUG)
            
        
        config = load_config(file_path = ("/media/sdc1/Aptana Studio 3 Workspace"
                                          "/configs/config.yaml"),
                             logger = logger,
                             exit_with_error = True)
        
        #Connect to test database
        connect("nyan_test", port = 20545)
        fill_database()
        #connect(config['database']['db-name'], 
        #        username= config['database']['user'], 
        #        password= config['database']['passwd'], 
        #        port = config['database']['port'])

        self.user_id = User.objects(email = u'*****@*****.**').first().id
        #feature_extractor = EsaFeatureExtractor(prefix = config['prefix'])
        feature_extractor = TfidfFeatureExtractor(prefix = config['prefix'])
        self.trainer = UserModelSVM(self.user_id, extractor = feature_extractor)

    def tearDown(self):
        clear_database()
        
    def test_mean_std_deviation(self):
        X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [3, 0], [3, 2]])
        
        self.trainer._calculate_mean_and_std_deviation(X)
        
        self.assertAlmostEqual(self.trainer.theta_[0], 0.16666, 4)
        self.assertAlmostEqual(self.trainer.theta_[1], -0.16666, 4)
        self.assertAlmostEqual(self.trainer.sigma_[0], 2.33927, 4)
        self.assertAlmostEqual(self.trainer.sigma_[1], 1.3437, 4)
        
    def test_normalize(self):
        X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [3, 0], [3, 2]])
        
        self.trainer.theta_ = np.array([1, -2], dtype=np.float32)
        self.trainer.sigma_ = np.array([2, 1], dtype=np.float32)
        X = self.trainer._normalize(X)
        
        self.assertEqual(X[1,0], -1.5)
        
    def test_normalize_no_theta(self):
        X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [3, 0], [3, 2]])
        
        self.assertRaises(AttributeError, lambda: self.trainer._normalize(X))
        
        #dummy set theta_ but not sigma_
        self.trainer.theta_ = np.array([1, -2], dtype=np.float32)
        
        self.assertRaises(AttributeError, lambda: self.trainer._normalize(X))
        
    def test_save_load_theta_sigma(self):
        X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [3, 0], [3, 2]])
        
        self.trainer._calculate_mean_and_std_deviation(X)
        
        self.trainer.clf = "dummy"
        
        tmp_theta = self.trainer.theta_
        tmp_sigma = self.trainer.sigma_
        
        self.trainer.save()
        self.trainer.load()
        
        #Check normalization parameters
        self.assertEqual(tmp_theta.all(),
                         self.trainer.theta_.all()) 
        self.assertEqual(tmp_sigma.all(),
                         self.trainer.sigma_.all()) 
        
    @unittest.skip("No ranking implemented yet")
    def test_rank(self):
        self.trainer.train()
        
        unread_doc = Article.objects(headline = u"Sony = Bad").first()
        read_doc = Article.objects(headline = u"Apple").first()
        
        rank_unread_doc = self.trainer.rank(unread_doc)
        rank_read_doc = self.trainer.rank(read_doc)
        
        self.assertEqual(rank_unread_doc, UserModelBayes.UNREAD) 
        self.assertEqual(rank_read_doc, UserModelBayes.READ)