class TestLogisticRegressionL2Norm(unittest.TestCase): """Tests for LogisticRegressionL2Norm class. Uses Amazon data to test logistic regression. Statics: _multiprocess_can_split_ (bool): Flag for nose tests to run tests in parallel. """ _multiprocess_can_split_ = True def setUp(self): """Constructor for TestLogisticRegression. Loads Amazon data, and creates training and testing data. """ # Create an instance of the Convert Numpy class self.convert_numpy = ConvertNumpy() # Create an instance of log likelihood self.log_likelhood = LogLikelihood() # Create an instance of the accuracy class self.accuracy = Accuracy() # Load the important words self.important_words = json.load(open('./unit_tests/test_data/classification/amazon/important_words.json', 'r')) # Create an instance of the Logistic Regression with L2 Norm class self.logistic_regression_l2_norm = LogisticRegressionL2Norm() # Load the amazon baby train subset self.training_data = pd.read_csv('./unit_tests/test_data/classification/amazon/amazon_baby_subset_train.csv') # Load the amazon baby train subset self.validation_data = pd.read_csv('./unit_tests/test_data/' 'classification/amazon/amazon_baby_subset_validation.csv') def test_01_gradient_ascent_no_penalty(self): """Tests gradient ascent algorithm. Tests the gradient ascent algorithm but with no l2 penalty. """ # We will use important words for the output features = self.important_words # Output will use sentiment output = ['sentiment'] # Convert our pandas frame to numpy feature_matrix_train, label_train = self.convert_numpy.convert_to_numpy(self.training_data, features, output, 1) feature_matrix_valid, label_valid = self.convert_numpy.convert_to_numpy(self.validation_data, features, output, 1) # Compute the coefficients coefficients = self.logistic_regression_l2_norm.gradient_ascent(feature_matrix_train, label_train, {"initial_coefficients": np.zeros(194), "step_size": 5e-6, "l2_penalty": 0, "max_iter": 501}) # Get the accuracy train_accuracy = self.accuracy.logistic_regression(feature_matrix_train, label_train, coefficients) validation_accuracy = self.accuracy.logistic_regression(feature_matrix_valid, label_valid, coefficients) # Make sure the accuraries are correct self.assertEqual(round(0.785156157787, 5), round(train_accuracy, 5)) self.assertEqual(round(0.78143964149, 5), round(validation_accuracy, 5)) def test_02_gradient_ascent_10_penalty(self): """Test gradient ascent algorithm. Tests the gradient ascent algorithm with penalty. """ # We will use important words for the output features = self.important_words # Output will use sentiment output = ['sentiment'] # Convert our pandas frame to numpy feature_matrix_train, label_train = self.convert_numpy.convert_to_numpy(self.training_data, features, output, 1) feature_matrix_valid, label_valid = self.convert_numpy.convert_to_numpy(self.validation_data, features, output, 1) # Compute the coefficients coefficients = self.logistic_regression_l2_norm.gradient_ascent(feature_matrix_train, label_train, {"initial_coefficients": np.zeros(194), "step_size": 5e-6, "l2_penalty": 10, "max_iter": 501}) # Get the accuracy train_accuracy = self.accuracy.logistic_regression(feature_matrix_train, label_train, coefficients) validation_accuracy = self.accuracy.logistic_regression(feature_matrix_valid, label_valid, coefficients) # Make sure the accuracies are correct self.assertEqual(round(0.784990911452, 5), round(train_accuracy, 5)) self.assertEqual(round(0.781719727383, 5), round(validation_accuracy, 5)) def test_03_log_likelihood(self): """Tests log likelihood with l2 norm. Tests the log likelihood with l2 norm and compare it with known values. """ # Generate test feature, coefficients, and label feature_matrix = np.array([[1., 2., 3.], [1., -1., -1]]) coefficients = np.array([1., 3., -1.]) label = np.array([-1, 1]) # Compute the log likelihood lg = self.log_likelhood.log_likelihood_l2_norm(feature_matrix, label, coefficients, 10) # Assert the value self.assertEqual(round(lg, 5), round(-105.33141000000001, 5))