def test_linear_condense_does_nothing_on_fewer_than_three_points(self):
        x, y, w = np.array([]), np.array([]), np.array([])
        self.assert_allclose([x, y, w],
                             linear_condense.linear_condense(x, y, w))

        x, y, w = np.array([1.]), np.array([2.]), np.array([.5])
        self.assert_allclose([x, y, w],
                             linear_condense.linear_condense(x, y, w))

        x, y, w = np.array([1., 2.]), np.array([2., 3.]), np.array([.5, 4.])
        self.assert_allclose([x, y, w],
                             linear_condense.linear_condense(x, y, w))
    def test_linear_condense_invariant_to_fusing_points(self):
        np.random.seed(954348)
        x = np.sort(np.random.randint(10, size=1000).astype(float))
        y = x + np.random.normal(size=1000)
        w = np.random.uniform(size=1000)

        condensed_x, condensed_y, condensed_w = linear_condense.linear_condense(
            x, y, w)
        fused_x, fused_y, fused_w = utils.fuse_sorted_points(x, y, w)
        fused_condensed_x, fused_condensed_y, fused_condensed_w = (
            linear_condense.linear_condense(fused_x, fused_y, fused_w))

        self.assert_allclose(condensed_x, fused_condensed_x)
        self.assert_allclose(condensed_y, fused_condensed_y)
        self.assert_allclose(condensed_w, fused_condensed_w)
 def test_linear_condense_returns_centroid_when_only_one_unique_x(self):
     x = np.array([1., 1., 1.])
     y = np.array([2., 3., 8.])
     w = np.array([1., 2., 1.])
     centroid = ([1.], [4.], [4.])
     self.assert_allclose(centroid,
                          linear_condense.linear_condense(x, y, w))
    def test_linear_condense_preserves_mse_diff_between_any_two_lines(self):
        np.random.seed(954349)
        x = np.random.normal(loc=3.0, scale=2.0, size=100)
        y = x + np.random.normal(loc=-1.0, scale=4.3, size=100)
        w = np.random.uniform(size=100)
        condensed_x, condensed_y, condensed_w = linear_condense.linear_condense(
            x, y, w)

        # Generate two random lines. Measure their mse on data and condensed data.
        slope1, intercept1 = np.random.normal(size=2)
        slope2, intercept2 = np.random.normal(size=2)
        l1_mse = _line_mse_on_data(slope1, intercept1, x, y, w)
        l2_mse = _line_mse_on_data(slope2, intercept2, x, y, w)
        condensed_l1_mse = _line_mse_on_data(slope1, intercept1, condensed_x,
                                             condensed_y, condensed_w)
        condensed_l2_mse = _line_mse_on_data(slope2, intercept2, condensed_x,
                                             condensed_y, condensed_w)

        # Condensed points squash noise, so l1_mse != condensed_l1_mse.
        # However, the difference true_mse(line) - condensed_mse(line) should be
        # approximately equal for every any line. Consequently, the difference
        # true_mse(l1) - true_mse(l2) ~= condensed_mse(l1) - condensed_mse(l2) for
        # any lines l1 and l2.
        self.assertAlmostEqual(l1_mse - l2_mse,
                               condensed_l1_mse - condensed_l2_mse)
 def test_linear_condense_returns_positive_weights(self):
     np.random.seed(954351)
     x = np.random.uniform(low=0.0, high=1.0, size=100)
     y = x**2 + np.random.normal(scale=1.3, size=100)
     w = np.random.uniform(size=100)
     _, _, (w1, w2) = linear_condense.linear_condense(x, y, w)
     self.assertGreater(w1, 0)
     self.assertGreater(w2, 0)
 def test_linear_condense_returns_xs_within_the_original_domain(self):
     np.random.seed(954350)
     x = np.random.uniform(low=0.0, high=1.0, size=100)
     y = x**2 + np.random.normal(scale=1.3, size=100)
     w = np.random.uniform(size=100)
     condensed_x, _, _ = linear_condense.linear_condense(x, y, w)
     self.assertLessEqual(x.min(), min(condensed_x))
     self.assertGreaterEqual(x.max(), max(condensed_x))
    def test_linear_condense_preserves_translation(self):
        np.random.seed(954348)
        x = np.random.normal(loc=3.0, scale=2.0, size=100)
        y = x + np.random.normal(loc=-1.0, scale=4.3, size=100)
        w = np.random.uniform(size=100)

        # Translate(condense(points)) = condense(translate(points)).
        # Note: translations in x and y and preserved, but NOT translations in w.
        x_trans, y_trans = np.random.normal(size=2)
        condensed_x, condensed_y, condensed_w = linear_condense.linear_condense(
            x, y, w)
        trans_condensed_x, trans_condensed_y, trans_condensed_w = (
            linear_condense.linear_condense(x + x_trans, y + y_trans, w))

        self.assert_allclose(condensed_x + x_trans, trans_condensed_x)
        self.assert_allclose(condensed_y + y_trans, trans_condensed_y)
        self.assert_allclose(condensed_w, trans_condensed_w)
    def test_linear_condense_preserves_best_fit_line(self):
        np.random.seed(954358)
        x = np.random.normal(loc=3.0, scale=2.0, size=100)
        y = x + np.random.normal(loc=-1.0, scale=4.3, size=100)
        w = np.random.uniform(size=100)

        condensed_x, condensed_y, condensed_w = linear_condense.linear_condense(
            x, y, w)
        condensed_slope, condensed_intercept = _best_fit_line(
            condensed_x, condensed_y, condensed_w)
        slope, intercept = _best_fit_line(x, y, w)

        self.assertAlmostEqual(slope, condensed_slope)
        self.assertAlmostEqual(intercept, condensed_intercept)
    def test_linear_condense_preserves_scaling(self):
        np.random.seed(954348)
        x = np.random.normal(loc=3.0, scale=2.0, size=100)
        y = x + np.random.normal(loc=-1.0, scale=4.3, size=100)
        w = np.random.uniform(size=100)

        # Scale(condense(points)) ~= condense(scale(points)).
        # Scaling works for all of x, y, and w, provided w stays positive.
        x_scale = np.random.normal()
        y_scale = np.random.normal()
        w_scale = abs(np.random.normal())
        condensed_x, condensed_y, condensed_w = linear_condense.linear_condense(
            x, y, w)
        scaled_condensed_x, scaled_condensed_y, scaled_condensed_w = (
            linear_condense.linear_condense(x * x_scale, y * y_scale,
                                            w * w_scale))

        self.assert_allclose(
            np.array(condensed_x) * x_scale, scaled_condensed_x)
        self.assert_allclose(
            np.array(condensed_y) * y_scale, scaled_condensed_y)
        self.assert_allclose(
            np.array(condensed_w) * w_scale, scaled_condensed_w)
    def test_linear_condense_preserves_centroid(self):
        np.random.seed(954348)
        x = np.random.normal(loc=3.0, scale=2.0, size=100)
        y = x + np.random.normal(loc=-1.0, scale=4.3, size=100)
        w = np.random.uniform(size=100)

        # Prod's centroid: (cx, cy, w_sum).
        w_sum = w.sum()
        cx = np.dot(x, w) / w_sum
        cy = np.dot(y, w) / w_sum

        # Condensed points centroid.
        condensed_x, condensed_y, condensed_w = linear_condense.linear_condense(
            x, y, w)
        condensed_w_sum = sum(condensed_w)
        condensed_cx = np.dot(condensed_x, condensed_w) / condensed_w_sum
        condensed_cy = np.dot(condensed_y, condensed_w) / condensed_w_sum

        self.assert_allclose(cx, condensed_cx)
        self.assert_allclose(cy, condensed_cy)
        self.assert_allclose(w_sum, condensed_w_sum)