def setUp(self): logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) config = load_config(file_path = ("/media/sdc1/Aptana Studio 3 Workspace" "/configs/config.yaml"), logger = logger, exit_with_error = True) #Connect to test database connect("nyan_test", port = 20545) fill_database() #connect(config['database']['db-name'], # username= config['database']['user'], # password= config['database']['passwd'], # port = config['database']['port']) self.user_id = User.objects(email = u'*****@*****.**').first().id #feature_extractor = EsaFeatureExtractor(prefix = config['prefix']) feature_extractor = TfidfFeatureExtractor(prefix = config['prefix']) self.trainer = UserModelSVM(self.user_id, extractor = feature_extractor)
# "Evaluationset: %d (read: %d, unread: %d).") % # (len(training_set_read)+len(training_set_unread), # len(training_set_read), # len(training_set_unread), # len(evaluation_set_read)+len(evaluation_set_unread), # len(evaluation_set_read), # len(evaluation_set_unread))) #learn on subset #user_model = UserModelBayes(user_id = user.id, # extractor = feature_extractor) #user_model = UserModelCentroid(user_id = user.id, # extractor = feature_extractor) user_model = UserModelSVM(user_id = user.id, extractor = feature_extractor) #user_model = UserModelTree(user_id = user.id, # extractor = feature_extractor) user_model.set_samples_sizes(p_synthetic, p_majority) #user_model.set_samples_sizes(p_synthetic_samples = None, # p_majority_samples = None) #user_model = UserModelMeta(user_id = user.id, # extractor = feature_extractor) user_model.train(read_article_ids = training_set_read, unread_article_ids = training_set_unread) #Set y_true y_true = np.empty(shape=(len(evaluation_set_read) + len(evaluation_set_unread)))
class UserModelSVMTest(unittest.TestCase): def setUp(self): logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) config = load_config(file_path = ("/media/sdc1/Aptana Studio 3 Workspace" "/configs/config.yaml"), logger = logger, exit_with_error = True) #Connect to test database connect("nyan_test", port = 20545) fill_database() #connect(config['database']['db-name'], # username= config['database']['user'], # password= config['database']['passwd'], # port = config['database']['port']) self.user_id = User.objects(email = u'*****@*****.**').first().id #feature_extractor = EsaFeatureExtractor(prefix = config['prefix']) feature_extractor = TfidfFeatureExtractor(prefix = config['prefix']) self.trainer = UserModelSVM(self.user_id, extractor = feature_extractor) def tearDown(self): clear_database() def test_mean_std_deviation(self): X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [3, 0], [3, 2]]) self.trainer._calculate_mean_and_std_deviation(X) self.assertAlmostEqual(self.trainer.theta_[0], 0.16666, 4) self.assertAlmostEqual(self.trainer.theta_[1], -0.16666, 4) self.assertAlmostEqual(self.trainer.sigma_[0], 2.33927, 4) self.assertAlmostEqual(self.trainer.sigma_[1], 1.3437, 4) def test_normalize(self): X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [3, 0], [3, 2]]) self.trainer.theta_ = np.array([1, -2], dtype=np.float32) self.trainer.sigma_ = np.array([2, 1], dtype=np.float32) X = self.trainer._normalize(X) self.assertEqual(X[1,0], -1.5) def test_normalize_no_theta(self): X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [3, 0], [3, 2]]) self.assertRaises(AttributeError, lambda: self.trainer._normalize(X)) #dummy set theta_ but not sigma_ self.trainer.theta_ = np.array([1, -2], dtype=np.float32) self.assertRaises(AttributeError, lambda: self.trainer._normalize(X)) def test_save_load_theta_sigma(self): X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [3, 0], [3, 2]]) self.trainer._calculate_mean_and_std_deviation(X) self.trainer.clf = "dummy" tmp_theta = self.trainer.theta_ tmp_sigma = self.trainer.sigma_ self.trainer.save() self.trainer.load() #Check normalization parameters self.assertEqual(tmp_theta.all(), self.trainer.theta_.all()) self.assertEqual(tmp_sigma.all(), self.trainer.sigma_.all()) @unittest.skip("No ranking implemented yet") def test_rank(self): self.trainer.train() unread_doc = Article.objects(headline = u"Sony = Bad").first() read_doc = Article.objects(headline = u"Apple").first() rank_unread_doc = self.trainer.rank(unread_doc) rank_read_doc = self.trainer.rank(read_doc) self.assertEqual(rank_unread_doc, UserModelBayes.UNREAD) self.assertEqual(rank_read_doc, UserModelBayes.READ)