def test_input_data_size(): # Regression test for #6288 # Previoulsly, a metric requiring a particular input dimension would fail def custom_metric(x, y): assert x.shape[0] == 3 return np.sum((x - y)**2) rng = check_random_state(0) X = rng.rand(10, 3) pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func, p=2) eucl = DistanceMetric.get_metric("euclidean") assert_array_almost_equal(pyfunc.pairwise(X), eucl.pairwise(X))
def check_pdist_bool(metric, D_true): dm = DistanceMetric.get_metric(metric) D12 = dm.pairwise(X1_bool) # Based on https://github.com/scipy/scipy/pull/7373 # When comparing two all-zero vectors, scipy>=1.2.0 jaccard metric # was changed to return 0, instead of nan. if metric == 'jaccard' and LooseVersion(scipy_version) < '1.2.0': D_true[np.isnan(D_true)] = 0 assert_array_almost_equal(D12, D_true)
def test_pyfunc_metric(): X = np.random.random((10, 3)) euclidean = DistanceMetric.get_metric("euclidean") pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func, p=2) # Check if both callable metric and predefined metric initialized # DistanceMetric object is picklable euclidean_pkl = pickle.loads(pickle.dumps(euclidean)) pyfunc_pkl = pickle.loads(pickle.dumps(pyfunc)) D1 = euclidean.pairwise(X) D2 = pyfunc.pairwise(X) D1_pkl = euclidean_pkl.pairwise(X) D2_pkl = pyfunc_pkl.pairwise(X) assert_array_almost_equal(D1, D2) assert_array_almost_equal(D1_pkl, D2_pkl)
def test_kd_tree_two_point(dualtree): n_samples, n_features = (100, 3) rng = check_random_state(0) X = rng.random_sample((n_samples, n_features)) Y = rng.random_sample((n_samples, n_features)) r = np.linspace(0, 1, 10) kdt = KDTree(X, leaf_size=10) D = DistanceMetric.get_metric("euclidean").pairwise(Y, X) counts_true = [(D <= ri).sum() for ri in r] counts = kdt.two_point_correlation(Y, r=r, dualtree=dualtree) assert_array_almost_equal(counts, counts_true)
def test_ball_tree_two_point(n_samples=100, n_features=3): rng = check_random_state(0) X = rng.random_sample((n_samples, n_features)) Y = rng.random_sample((n_samples, n_features)) r = np.linspace(0, 1, 10) bt = BallTree(X, leaf_size=10) D = DistanceMetric.get_metric("euclidean").pairwise(Y, X) counts_true = [(D <= ri).sum() for ri in r] def check_two_point(r, dualtree): counts = bt.two_point_correlation(Y, r=r, dualtree=dualtree) assert_array_almost_equal(counts, counts_true) for dualtree in (True, False): check_two_point(r, dualtree)
def test_haversine_metric(): def haversine_slow(x1, x2): return 2 * np.arcsin( np.sqrt( np.sin(0.5 * (x1[0] - x2[0]))**2 + np.cos(x1[0]) * np.cos(x2[0]) * np.sin(0.5 * (x1[1] - x2[1]))**2)) X = np.random.random((10, 2)) haversine = DistanceMetric.get_metric("haversine") D1 = haversine.pairwise(X) D2 = np.zeros_like(D1) for i, x1 in enumerate(X): for j, x2 in enumerate(X): D2[i, j] = haversine_slow(x1, x2) assert_array_almost_equal(D1, D2) assert_array_almost_equal(haversine.dist_to_rdist(D1), np.sin(0.5 * D2)**2)
def brute_force_neighbors(X, Y, k, metric, **kwargs): D = DistanceMetric.get_metric(metric, **kwargs).pairwise(Y, X) ind = np.argsort(D, axis=1)[:, :k] dist = D[np.arange(Y.shape[0])[:, None], ind] return dist, ind
def check_pdist(metric, kwargs, D_true): dm = DistanceMetric.get_metric(metric, **kwargs) D12 = dm.pairwise(X1) assert_array_almost_equal(D12, D_true)
def check_cdist_bool(metric, D_true): dm = DistanceMetric.get_metric(metric) D12 = dm.pairwise(X1_bool, X2_bool) assert_array_almost_equal(D12, D_true)
def check_pickle(metric, kwargs): dm = DistanceMetric.get_metric(metric, **kwargs) D1 = dm.pairwise(X1) dm2 = pickle.loads(pickle.dumps(dm)) D2 = dm2.pairwise(X1) assert_array_almost_equal(D1, D2)
def test_pickle_bool_metrics(metric): dm = DistanceMetric.get_metric(metric) D1 = dm.pairwise(X1_bool) dm2 = pickle.loads(pickle.dumps(dm)) D2 = dm2.pairwise(X1_bool) assert_array_almost_equal(D1, D2)