Exemple #1
0
 def test_rescale(self):
     """munge -- rescale"""
     datamat = [[4, 8, 3, 4, 5, 6, 7, 8, 9], [1, 9, 11, 4, 5, 6, 7, 8, 9],
                [3, 2, 3, 99, 5, 6, 7, 8, 9]]
     result = munge.rescale(datamat)
     expected = [
         0.8728715609439696, 0.440225453162812, -0.5773502691896257,
         -0.5773502691896257, 5, 6, 7, 8, 9
     ]
     self.assertEqual(expected, result[0])
    def test_gradient(self):
        """logistic_regression -- gradient"""
        # x is [1, experience, salary]
        x = [[1] + row[:2] for row in self.data]

        # y is "paid account"
        y = [row[2] for row in self.data]

        rescaled = munge.rescale(x)

        random.seed(0)
        x_train, x_test, y_train, y_test = ml.train_test_split(rescaled,
                                                               y,
                                                               0.33)

        beta_0 = [1, 1, 1]
        beta_hat = gradient.\
            maximize_stochastic(logistic_regression.logistic_log_likelihood_i,
                                logistic_regression.logistic_log_gradient_i,
                                x_train,
                                y_train,
                                beta_0)

        true_positives = false_positives = true_negatives = false_negatives = 0

        for x_i, y_i in zip(x_test, y_test):
            predict = logistic_regression.logistic(algebra.dot(beta_hat, x_i))

            if y_i == 1 and predict >= 0.5:
                true_positives += 1
            elif y_i == 1:
                false_negatives += 1
            elif predict >= 0.5:
                false_positives += 1
            else:
                true_negatives += 1

        message = "true_pos={0}; false_neg={1}, false_pos={2}; true_neg={3}"
        logging.debug(message.format(true_positives,
                                     false_negatives,
                                     false_positives,
                                     true_negatives))
        precision = true_positives / float((true_positives + false_positives))
        recall = true_positives / float((true_positives + false_negatives))

        self.assertEqual(0.93, round(precision, 2))
        self.assertEqual(0.82, round(recall, 2))
Exemple #3
0
    def test_gradient(self):
        """logistic_regression -- gradient"""
        # x is [1, experience, salary]
        x = [[1] + row[:2] for row in self.data]

        # y is "paid account"
        y = [row[2] for row in self.data]

        rescaled = munge.rescale(x)

        random.seed(0)
        x_train, x_test, y_train, y_test = ml.train_test_split(
            rescaled, y, 0.33)

        beta_0 = [1, 1, 1]
        beta_hat = gradient.\
            maximize_stochastic(logistic_regression.logistic_log_likelihood_i,
                                logistic_regression.logistic_log_gradient_i,
                                x_train,
                                y_train,
                                beta_0)

        true_positives = false_positives = true_negatives = false_negatives = 0

        for x_i, y_i in zip(x_test, y_test):
            predict = logistic_regression.logistic(algebra.dot(beta_hat, x_i))

            if y_i == 1 and predict >= 0.5:
                true_positives += 1
            elif y_i == 1:
                false_negatives += 1
            elif predict >= 0.5:
                false_positives += 1
            else:
                true_negatives += 1

        message = "true_pos={0}; false_neg={1}, false_pos={2}; true_neg={3}"
        logging.debug(
            message.format(true_positives, false_negatives, false_positives,
                           true_negatives))
        precision = true_positives / float((true_positives + false_positives))
        recall = true_positives / float((true_positives + false_negatives))

        self.assertEqual(0.93, round(precision, 2))
        self.assertEqual(0.82, round(recall, 2))
Exemple #4
0
 def test_rescale(self):
     """munge -- rescale"""
     datamat = [
         [4, 8, 3, 4, 5, 6, 7, 8, 9],
         [1, 9, 11, 4, 5, 6, 7, 8, 9],
         [3, 2, 3, 99, 5, 6, 7, 8, 9]
     ]
     result = munge.rescale(datamat)
     expected = [0.8728715609439696,
                 0.440225453162812,
                 -0.5773502691896257,
                 -0.5773502691896257,
                 5,
                 6,
                 7,
                 8,
                 9]
     self.assertEqual(expected, result[0])