def test_linear_condense_does_nothing_on_fewer_than_three_points(self): x, y, w = np.array([]), np.array([]), np.array([]) self.assert_allclose([x, y, w], linear_condense.linear_condense(x, y, w)) x, y, w = np.array([1.]), np.array([2.]), np.array([.5]) self.assert_allclose([x, y, w], linear_condense.linear_condense(x, y, w)) x, y, w = np.array([1., 2.]), np.array([2., 3.]), np.array([.5, 4.]) self.assert_allclose([x, y, w], linear_condense.linear_condense(x, y, w))
def test_linear_condense_invariant_to_fusing_points(self): np.random.seed(954348) x = np.sort(np.random.randint(10, size=1000).astype(float)) y = x + np.random.normal(size=1000) w = np.random.uniform(size=1000) condensed_x, condensed_y, condensed_w = linear_condense.linear_condense( x, y, w) fused_x, fused_y, fused_w = utils.fuse_sorted_points(x, y, w) fused_condensed_x, fused_condensed_y, fused_condensed_w = ( linear_condense.linear_condense(fused_x, fused_y, fused_w)) self.assert_allclose(condensed_x, fused_condensed_x) self.assert_allclose(condensed_y, fused_condensed_y) self.assert_allclose(condensed_w, fused_condensed_w)
def test_linear_condense_returns_centroid_when_only_one_unique_x(self): x = np.array([1., 1., 1.]) y = np.array([2., 3., 8.]) w = np.array([1., 2., 1.]) centroid = ([1.], [4.], [4.]) self.assert_allclose(centroid, linear_condense.linear_condense(x, y, w))
def test_linear_condense_preserves_mse_diff_between_any_two_lines(self): np.random.seed(954349) x = np.random.normal(loc=3.0, scale=2.0, size=100) y = x + np.random.normal(loc=-1.0, scale=4.3, size=100) w = np.random.uniform(size=100) condensed_x, condensed_y, condensed_w = linear_condense.linear_condense( x, y, w) # Generate two random lines. Measure their mse on data and condensed data. slope1, intercept1 = np.random.normal(size=2) slope2, intercept2 = np.random.normal(size=2) l1_mse = _line_mse_on_data(slope1, intercept1, x, y, w) l2_mse = _line_mse_on_data(slope2, intercept2, x, y, w) condensed_l1_mse = _line_mse_on_data(slope1, intercept1, condensed_x, condensed_y, condensed_w) condensed_l2_mse = _line_mse_on_data(slope2, intercept2, condensed_x, condensed_y, condensed_w) # Condensed points squash noise, so l1_mse != condensed_l1_mse. # However, the difference true_mse(line) - condensed_mse(line) should be # approximately equal for every any line. Consequently, the difference # true_mse(l1) - true_mse(l2) ~= condensed_mse(l1) - condensed_mse(l2) for # any lines l1 and l2. self.assertAlmostEqual(l1_mse - l2_mse, condensed_l1_mse - condensed_l2_mse)
def test_linear_condense_returns_positive_weights(self): np.random.seed(954351) x = np.random.uniform(low=0.0, high=1.0, size=100) y = x**2 + np.random.normal(scale=1.3, size=100) w = np.random.uniform(size=100) _, _, (w1, w2) = linear_condense.linear_condense(x, y, w) self.assertGreater(w1, 0) self.assertGreater(w2, 0)
def test_linear_condense_returns_xs_within_the_original_domain(self): np.random.seed(954350) x = np.random.uniform(low=0.0, high=1.0, size=100) y = x**2 + np.random.normal(scale=1.3, size=100) w = np.random.uniform(size=100) condensed_x, _, _ = linear_condense.linear_condense(x, y, w) self.assertLessEqual(x.min(), min(condensed_x)) self.assertGreaterEqual(x.max(), max(condensed_x))
def test_linear_condense_preserves_translation(self): np.random.seed(954348) x = np.random.normal(loc=3.0, scale=2.0, size=100) y = x + np.random.normal(loc=-1.0, scale=4.3, size=100) w = np.random.uniform(size=100) # Translate(condense(points)) = condense(translate(points)). # Note: translations in x and y and preserved, but NOT translations in w. x_trans, y_trans = np.random.normal(size=2) condensed_x, condensed_y, condensed_w = linear_condense.linear_condense( x, y, w) trans_condensed_x, trans_condensed_y, trans_condensed_w = ( linear_condense.linear_condense(x + x_trans, y + y_trans, w)) self.assert_allclose(condensed_x + x_trans, trans_condensed_x) self.assert_allclose(condensed_y + y_trans, trans_condensed_y) self.assert_allclose(condensed_w, trans_condensed_w)
def test_linear_condense_preserves_best_fit_line(self): np.random.seed(954358) x = np.random.normal(loc=3.0, scale=2.0, size=100) y = x + np.random.normal(loc=-1.0, scale=4.3, size=100) w = np.random.uniform(size=100) condensed_x, condensed_y, condensed_w = linear_condense.linear_condense( x, y, w) condensed_slope, condensed_intercept = _best_fit_line( condensed_x, condensed_y, condensed_w) slope, intercept = _best_fit_line(x, y, w) self.assertAlmostEqual(slope, condensed_slope) self.assertAlmostEqual(intercept, condensed_intercept)
def test_linear_condense_preserves_scaling(self): np.random.seed(954348) x = np.random.normal(loc=3.0, scale=2.0, size=100) y = x + np.random.normal(loc=-1.0, scale=4.3, size=100) w = np.random.uniform(size=100) # Scale(condense(points)) ~= condense(scale(points)). # Scaling works for all of x, y, and w, provided w stays positive. x_scale = np.random.normal() y_scale = np.random.normal() w_scale = abs(np.random.normal()) condensed_x, condensed_y, condensed_w = linear_condense.linear_condense( x, y, w) scaled_condensed_x, scaled_condensed_y, scaled_condensed_w = ( linear_condense.linear_condense(x * x_scale, y * y_scale, w * w_scale)) self.assert_allclose( np.array(condensed_x) * x_scale, scaled_condensed_x) self.assert_allclose( np.array(condensed_y) * y_scale, scaled_condensed_y) self.assert_allclose( np.array(condensed_w) * w_scale, scaled_condensed_w)
def test_linear_condense_preserves_centroid(self): np.random.seed(954348) x = np.random.normal(loc=3.0, scale=2.0, size=100) y = x + np.random.normal(loc=-1.0, scale=4.3, size=100) w = np.random.uniform(size=100) # Prod's centroid: (cx, cy, w_sum). w_sum = w.sum() cx = np.dot(x, w) / w_sum cy = np.dot(y, w) / w_sum # Condensed points centroid. condensed_x, condensed_y, condensed_w = linear_condense.linear_condense( x, y, w) condensed_w_sum = sum(condensed_w) condensed_cx = np.dot(condensed_x, condensed_w) / condensed_w_sum condensed_cy = np.dot(condensed_y, condensed_w) / condensed_w_sum self.assert_allclose(cx, condensed_cx) self.assert_allclose(cy, condensed_cy) self.assert_allclose(w_sum, condensed_w_sum)