def test_rescale(self): """munge -- rescale""" datamat = [[4, 8, 3, 4, 5, 6, 7, 8, 9], [1, 9, 11, 4, 5, 6, 7, 8, 9], [3, 2, 3, 99, 5, 6, 7, 8, 9]] result = munge.rescale(datamat) expected = [ 0.8728715609439696, 0.440225453162812, -0.5773502691896257, -0.5773502691896257, 5, 6, 7, 8, 9 ] self.assertEqual(expected, result[0])
def test_gradient(self): """logistic_regression -- gradient""" # x is [1, experience, salary] x = [[1] + row[:2] for row in self.data] # y is "paid account" y = [row[2] for row in self.data] rescaled = munge.rescale(x) random.seed(0) x_train, x_test, y_train, y_test = ml.train_test_split(rescaled, y, 0.33) beta_0 = [1, 1, 1] beta_hat = gradient.\ maximize_stochastic(logistic_regression.logistic_log_likelihood_i, logistic_regression.logistic_log_gradient_i, x_train, y_train, beta_0) true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): predict = logistic_regression.logistic(algebra.dot(beta_hat, x_i)) if y_i == 1 and predict >= 0.5: true_positives += 1 elif y_i == 1: false_negatives += 1 elif predict >= 0.5: false_positives += 1 else: true_negatives += 1 message = "true_pos={0}; false_neg={1}, false_pos={2}; true_neg={3}" logging.debug(message.format(true_positives, false_negatives, false_positives, true_negatives)) precision = true_positives / float((true_positives + false_positives)) recall = true_positives / float((true_positives + false_negatives)) self.assertEqual(0.93, round(precision, 2)) self.assertEqual(0.82, round(recall, 2))
def test_gradient(self): """logistic_regression -- gradient""" # x is [1, experience, salary] x = [[1] + row[:2] for row in self.data] # y is "paid account" y = [row[2] for row in self.data] rescaled = munge.rescale(x) random.seed(0) x_train, x_test, y_train, y_test = ml.train_test_split( rescaled, y, 0.33) beta_0 = [1, 1, 1] beta_hat = gradient.\ maximize_stochastic(logistic_regression.logistic_log_likelihood_i, logistic_regression.logistic_log_gradient_i, x_train, y_train, beta_0) true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): predict = logistic_regression.logistic(algebra.dot(beta_hat, x_i)) if y_i == 1 and predict >= 0.5: true_positives += 1 elif y_i == 1: false_negatives += 1 elif predict >= 0.5: false_positives += 1 else: true_negatives += 1 message = "true_pos={0}; false_neg={1}, false_pos={2}; true_neg={3}" logging.debug( message.format(true_positives, false_negatives, false_positives, true_negatives)) precision = true_positives / float((true_positives + false_positives)) recall = true_positives / float((true_positives + false_negatives)) self.assertEqual(0.93, round(precision, 2)) self.assertEqual(0.82, round(recall, 2))
def test_rescale(self): """munge -- rescale""" datamat = [ [4, 8, 3, 4, 5, 6, 7, 8, 9], [1, 9, 11, 4, 5, 6, 7, 8, 9], [3, 2, 3, 99, 5, 6, 7, 8, 9] ] result = munge.rescale(datamat) expected = [0.8728715609439696, 0.440225453162812, -0.5773502691896257, -0.5773502691896257, 5, 6, 7, 8, 9] self.assertEqual(expected, result[0])