コード例 #1
0
def test_input_data_size():
    # Regression test for #6288
    # Previoulsly, a metric requiring a particular input dimension would fail
    def custom_metric(x, y):
        assert x.shape[0] == 3
        return np.sum((x - y) ** 2)

    rng = check_random_state(0)
    X = rng.rand(10, 3)

    pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func, p=2)
    eucl = DistanceMetric.get_metric("euclidean")
    assert_array_almost_equal(pyfunc.pairwise(X), eucl.pairwise(X))
コード例 #2
0
def test_pyfunc_metric():
    def dist_func(x1, x2, p):
        return np.sum((x1 - x2) ** p) ** (1. / p)

    X = np.random.random((10, 3))

    euclidean = DistanceMetric.get_metric("euclidean")
    pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func, p=2)

    D1 = euclidean.pairwise(X)
    D2 = pyfunc.pairwise(X)

    assert_allclose(D1, D2)
コード例 #3
0
def check_pdist_bool(metric, D_true):
    dm = DistanceMetric.get_metric(metric)
    D12 = dm.pairwise(X1_bool)
    # Based on https://github.com/scipy/scipy/pull/7373
    # When comparing two all-zero vectors, scipy>=1.2.0 jaccard metric
    # was changed to return 0, instead of nan.
    if metric == 'jaccard' and LooseVersion(scipy_version) < '1.2.0':
        D_true[np.isnan(D_true)] = 0
    assert_array_almost_equal(D12, D_true)
コード例 #4
0
def test_pyfunc_metric():
    X = np.random.random((10, 3))

    euclidean = DistanceMetric.get_metric("euclidean")
    pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func, p=2)

    # Check if both callable metric and predefined metric initialized
    # DistanceMetric object is picklable
    euclidean_pkl = pickle.loads(pickle.dumps(euclidean))
    pyfunc_pkl = pickle.loads(pickle.dumps(pyfunc))

    D1 = euclidean.pairwise(X)
    D2 = pyfunc.pairwise(X)

    D1_pkl = euclidean_pkl.pairwise(X)
    D2_pkl = pyfunc_pkl.pairwise(X)

    assert_array_almost_equal(D1, D2)
    assert_array_almost_equal(D1_pkl, D2_pkl)
コード例 #5
0
def test_kd_tree_two_point(dualtree):
    n_samples, n_features = (100, 3)
    rng = check_random_state(0)
    X = rng.random_sample((n_samples, n_features))
    Y = rng.random_sample((n_samples, n_features))
    r = np.linspace(0, 1, 10)
    kdt = KDTree(X, leaf_size=10)

    D = DistanceMetric.get_metric("euclidean").pairwise(Y, X)
    counts_true = [(D <= ri).sum() for ri in r]

    counts = kdt.two_point_correlation(Y, r=r, dualtree=dualtree)
    assert_array_almost_equal(counts, counts_true)
コード例 #6
0
ファイル: test_ball_tree.py プロジェクト: Afey/scikit-learn
def test_ball_tree_two_point(n_samples=100, n_features=3):
    np.random.seed(0)
    X = np.random.random((n_samples, n_features))
    Y = np.random.random((n_samples, n_features))
    r = np.linspace(0, 1, 10)
    bt = BallTree(X, leaf_size=10)

    D = DistanceMetric.get_metric("euclidean").pairwise(Y, X)
    counts_true = [(D <= ri).sum() for ri in r]

    def check_two_point(r, dualtree):
        counts = bt.two_point_correlation(Y, r=r, dualtree=dualtree)
        assert_array_almost_equal(counts, counts_true)

    for dualtree in (True, False):
        yield check_two_point, r, dualtree
コード例 #7
0
def test_haversine_metric():
    def haversine_slow(x1, x2):
        return 2 * np.arcsin(np.sqrt(np.sin(0.5 * (x1[0] - x2[0])) ** 2
                                     + np.cos(x1[0]) * np.cos(x2[0]) *
                                     np.sin(0.5 * (x1[1] - x2[1])) ** 2))

    X = np.random.random((10, 2))

    haversine = DistanceMetric.get_metric("haversine")

    D1 = haversine.pairwise(X)
    D2 = np.zeros_like(D1)
    for i, x1 in enumerate(X):
        for j, x2 in enumerate(X):
            D2[i, j] = haversine_slow(x1, x2)

    assert_array_almost_equal(D1, D2)
    assert_array_almost_equal(haversine.dist_to_rdist(D1),
                              np.sin(0.5 * D2) ** 2)
コード例 #8
0
def check_pickle(metric, kwargs):
    dm = DistanceMetric.get_metric(metric, **kwargs)
    D1 = dm.pairwise(X1)
    dm2 = pickle.loads(pickle.dumps(dm))
    D2 = dm2.pairwise(X1)
    assert_array_almost_equal(D1, D2)
コード例 #9
0
def test_pickle_bool_metrics(metric):
    dm = DistanceMetric.get_metric(metric)
    D1 = dm.pairwise(X1_bool)
    dm2 = pickle.loads(pickle.dumps(dm))
    D2 = dm2.pairwise(X1_bool)
    assert_array_almost_equal(D1, D2)
コード例 #10
0
 def check_pdist(self, metric, kwargs, D_true):
     if metric == 'canberra' and cmp_version(scipy.__version__, '0.9') <= 0:
         raise SkipTest("Canberra distance incorrect in scipy < 0.9")
     dm = DistanceMetric.get_metric(metric, **kwargs)
     D12 = dm.pairwise(self.X1)
     assert_allclose(D12, D_true)
コード例 #11
0
 def check_cdist(self, metric, kwargs, D_true):
     dm = DistanceMetric.get_metric(metric, **kwargs)
     D12 = dm.pairwise(self.X1, self.X2)
     assert_array_almost_equal(D12, D_true)
コード例 #12
0
 def check_pickle_bool(self, metric):
     dm = DistanceMetric.get_metric(metric)
     D1 = dm.pairwise(self.X1_bool)
     dm2 = pickle.loads(pickle.dumps(dm))
     D2 = dm2.pairwise(self.X1_bool)
     assert_array_almost_equal(D1, D2)
コード例 #13
0
 def check_cdist(self, metric, kwargs, D_true):
     if metric == "canberra" and cmp_version(scipy.__version__, "0.9") <= 0:
         raise SkipTest("Canberra distance incorrect in scipy < 0.9")
     dm = DistanceMetric.get_metric(metric, **kwargs)
     D12 = dm.pairwise(self.X1, self.X2)
     assert_array_almost_equal(D12, D_true)
def compute_distances():
    # Load IXP-GST positions
    altitude = 1150
    min_elev = 40
    orbits = 32
    sat_per_orbit = 50
    inclination = 53
    gst_file = "data/raw/ixp_geolocation.csv"
    src_file = "data/raw/WUP2018-F22-Cities_Over_300K_Annual.csv"

    # Load geo information
    sat_pos, gst_pos, src_pos = load_locations(altitude,
                                               orbits,
                                               sat_per_orbit,
                                               inclination,
                                               gst_file,
                                               src_file,
                                               time=15000)

    lon_sort_idx_src = np.argsort(src_pos[:, 1])
    src_pos = (src_pos[lon_sort_idx_src])

    # Remove SRCs that are too high in latitude
    higher = np.where(src_pos[:, 0] > 56)[0]
    src_pos = np.delete(src_pos, higher, axis=0)

    lon_sort_idx_gst = np.argsort(gst_pos[:, 1])
    gst_pos = (gst_pos[lon_sort_idx_gst])

    # %%
    sat_sat_dist = compute_sat_sat_distance(sat_pos, altitude, orbits,
                                            sat_per_orbit)
    # Compute the BallTree for the satellites. This gives nn to satellites.
    sat_tree = BallTree(np.deg2rad(sat_pos),
                        metric=DistanceMetric.get_metric("haversine"))

    # Get the satellites that are in reach for the ground stations
    #   and their distance.
    sat_gst_ind_city, sat_gst_dist_city = compute_gst_sat_distance(
        altitude, min_elev, src_pos, sat_tree)

    src_src_satellite = gsts_optimization(sat_gst_ind_city,
                                          sat_gst_dist_city,
                                          sat_sat_dist,
                                          n_gsts=src_pos.shape[0])

    src_src_latency = src_src_satellite / LIGHT_IN_VACUUM

    # %%
    sat_gst_ind_ixp, sat_gst_dist_ixp = compute_gst_sat_distance(
        altitude, min_elev, gst_pos, sat_tree)

    gst_gst_satellite = gsts_optimization(sat_gst_ind_ixp,
                                          sat_gst_dist_ixp,
                                          sat_sat_dist,
                                          n_gsts=gst_pos.shape[0])

    src_gst_ind, src_gst_dist = src_nearest_gst_distance(src_pos, gst_pos)

    n_src = src_pos.shape[0]
    src_gst_latency = compute_src_dst_latency(n_src, [], src_gst_ind,
                                              src_gst_dist, [], [],
                                              gst_gst_satellite)

    return src_gst_latency, src_src_latency, src_pos
コード例 #15
0
 def check_pdist_bool(self, metric, D_true):
     dm = DistanceMetric.get_metric(metric)
     D12 = dm.pairwise(self.X1_bool)
     assert_allclose(D12, D_true)
コード例 #16
0
def check_pdist(metric, kwargs, D_true):
    dm = DistanceMetric.get_metric(metric, **kwargs)
    D12 = dm.pairwise(X1)
    assert_array_almost_equal(D12, D_true)
def plot_absolute(src_gst_latency, src_src_latency, src_pos):
    triu = np.triu_indices(src_gst_latency.shape[0], 1)
    ixp_routed = np.around(src_gst_latency[triu], 6)
    city_gst = np.around(src_src_latency[triu], 6)

    SCALING = 1e3

    plt.figure(figsize=(8, 6))
    pairwise_src = DistanceMetric.pairwise(
        DistanceMetric.get_metric("haversine"), np.deg2rad(src_pos),
        np.deg2rad(src_pos))
    pairwise_src = pairwise_src * EARTH_RADIUS

    pairwise = pairwise_src[triu]
    vals, avg_c, min_c, max_c, _ = vector_map_statistics(
        pairwise, city_gst, 10)

    avg_c = np.asarray(avg_c) * SCALING
    min_c = np.asarray(min_c) * SCALING
    max_c = np.asarray(max_c) * SCALING

    plt.plot(vals, avg_c, label="Average city-city", linewidth=3)
    plt.xlabel("SRC-DST distance (km)")
    plt.ylabel("Latency (s)")
    plt.legend(loc=2)

    pairwise = pairwise_src[triu]
    vals, avg_g, min_g, max_g, _ = vector_map_statistics(
        pairwise, ixp_routed, 10)

    avg_g = np.asarray(avg_g) * SCALING
    min_g = np.asarray(min_g) * SCALING
    max_g = np.asarray(max_g) * SCALING

    plt.plot(vals, avg_g, label="Average IXP-city", linewidth=3)

    plt.plot(vals,
             vals / LIGHT_IN_FIBER * SCALING,
             ':',
             linewidth=3,
             label="Great-circle in fiber")
    plt.plot(vals,
             vals / LIGHT_IN_VACUUM * SCALING,
             '--',
             label="Great-circle in vacuum",
             linewidth=3)
    plt.plot(vals,
             vals * FIBER_PATH_STRETCH / LIGHT_IN_FIBER * SCALING,
             '-.',
             label="Path-stretch in fiber",
             linewidth=3)

    plt.ylim(0, 150)
    plt.xlim(0, np.max(vals))

    plt.xlabel("SRC-DST great-circle distance (km)")
    plt.ylabel("One-way latency (s)")
    plt.legend(loc=9, ncol=2, mode="expand")

    # Save figures
    # plt.savefig("figures/latency-distance.pdf")
    plt.savefig("figures/latency-distance.png")
コード例 #18
0
def test_pickle_bool_metrics(metric):
    dm = DistanceMetric.get_metric(metric)
    D1 = dm.pairwise(X1_bool)
    dm2 = pickle.loads(pickle.dumps(dm))
    D2 = dm2.pairwise(X1_bool)
    assert_array_almost_equal(D1, D2)
コード例 #19
0
 def check_cdist(self, metric, kwargs, D_true):
     dm = DistanceMetric.get_metric(metric, **kwargs)
     D12 = dm.pairwise(self.X1, self.X2)
     assert_array_almost_equal(D12, D_true)
コード例 #20
0
 def check_pickle_bool(self, metric):
     dm = DistanceMetric.get_metric(metric)
     D1 = dm.pairwise(self.X1_bool)
     dm2 = pickle.loads(pickle.dumps(dm))
     D2 = dm2.pairwise(self.X1_bool)
     assert_array_almost_equal(D1, D2)
コード例 #21
0
def brute_force_neighbors(X, Y, k, metric, **kwargs):
    D = DistanceMetric.get_metric(metric, **kwargs).pairwise(Y, X)
    ind = np.argsort(D, axis=1)[:, :k]
    dist = D[np.arange(Y.shape[0])[:, None], ind]
    return dist, ind
コード例 #22
0

# def dist_func(a, b):
#     alpha = 1
#     return np.sqrt((a.x - b.x)**2 +
#                      (a.y - b.y)**2 +
#                      alpha*(a.theta - b.theta)**2)


def dist_func(a, b):
    alpha = 1
    return np.sqrt((a[0] - b[0])**2 + (a[1] - b[1])**2 + alpha *
                   (a[2] - b[2])**2)


pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func)
model_states, X = make_states()
for i in range(X.shape[0]):
    print(X[i, :])
print("TREE TIME")
tree = KDTree(X, leaf_size=4, metric="euclidean")
pts = np.array([(0, 0, 0)])
dist, ind = tree.query(pts, k=1)
for i in ind:
    print(X[i])
    print(np.asscalar(i))
    print(model_states[np.asscalar(i)])

# print(dist)
# print(KDTree.valid_metrics)
# a = np.empty((5, 5, 3))
コード例 #23
0
def check_pickle(metric, kwargs):
    dm = DistanceMetric.get_metric(metric, **kwargs)
    D1 = dm.pairwise(X1)
    dm2 = pickle.loads(pickle.dumps(dm))
    D2 = dm2.pairwise(X1)
    assert_array_almost_equal(D1, D2)
def plot_relative(src_gst_latency, src_src_latency, src_pos):
    triu = np.triu_indices(src_gst_latency.shape[0], 1)
    ixp_routed = np.around(src_gst_latency[triu], 6)
    city_gst = np.around(src_src_latency[triu], 6)

    pairwise_src = DistanceMetric.pairwise(
        DistanceMetric.get_metric("haversine"), np.deg2rad(src_pos),
        np.deg2rad(src_pos))
    pairwise_src = pairwise_src * EARTH_RADIUS

    percent = (ixp_routed - city_gst) / city_gst * 100
    pairwise = pairwise_src[triu]

    vals, avg_c, min_c, max_c, percent = vector_map_statistics(
        pairwise, percent, 100, [25, 75])
    colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
    cur_color = colors[0]

    plt.figure(figsize=(8, 6))
    gs1 = gridspec.GridSpec(2, 1)
    gs1.update(wspace=0.01, hspace=0.11)  # set the spacing between axes.

    axes = [plt.subplot(gs1[0]), plt.subplot(gs1[1])]

    axes[0].axhline(0, c='grey', linewidth=0.5)
    axes[0].semilogy(vals, avg_c, label="Average", c=cur_color, linewidth=3)
    axes[0].semilogy(vals,
                     percent[25],
                     "--",
                     label="Quartiles",
                     c=cur_color,
                     linewidth=3)
    axes[0].semilogy(vals, percent[75], "--", c=cur_color, linewidth=3)
    axes[0].plot(vals,
                 max_c,
                 ":",
                 linewidth=3,
                 label="Min-max variability",
                 c=cur_color)
    axes[0].set_ylim(90, 1000)
    axes[0].set_ylabel("log-scale")
    axes[0].set_xticks([])
    axes[0].legend()

    axes[1].axhline(0, c='grey', linewidth=0.5)
    axes[1].plot(vals,
                 avg_c,
                 label="Average city-city",
                 c=cur_color,
                 linewidth=3)
    axes[1].plot(vals,
                 percent[25],
                 "--",
                 label="Quartiles",
                 c=cur_color,
                 linewidth=3)
    axes[1].plot(vals, percent[75], "--", c=cur_color, linewidth=3)
    axes[1].plot(vals,
                 max_c,
                 ":",
                 linewidth=3,
                 label="Min-max variability",
                 c=cur_color)
    axes[1].plot(vals,
                 min_c,
                 ":",
                 linewidth=3,
                 label="Min-max variability",
                 c=cur_color)
    axes[1].set_xlabel("SRC-DST great-circle distance (km)")
    axes[1].set_ylabel("Loss IXP deployment (%)")
    # axes[1].set_ylabel("Latency increase IXP deployment (%)")
    axes[1].set_ylim(-50, 90)

    plt.savefig("figures/percent-ixp-loss.png")
コード例 #25
0
def check_cdist_bool(metric, D_true):
    dm = DistanceMetric.get_metric(metric)
    D12 = dm.pairwise(X1_bool, X2_bool)
    assert_array_almost_equal(D12, D_true)
コード例 #26
0
def check_cdist_bool(metric, D_true):
    dm = DistanceMetric.get_metric(metric)
    D12 = dm.pairwise(X1_bool, X2_bool)
    assert_array_almost_equal(D12, D_true)
コード例 #27
0
def check_pdist(metric, kwargs, D_true):
    dm = DistanceMetric.get_metric(metric, **kwargs)
    D12 = dm.pairwise(X1)
    assert_array_almost_equal(D12, D_true)
コード例 #28
0
ファイル: test_dist_metrics.py プロジェクト: fess10/neighbors
 def check_pdist(self, metric, kwargs, D_true):
     if metric == 'canberra' and cmp_version(scipy.__version__, '0.9') <= 0:
         raise SkipTest("Canberra distance incorrect in scipy < 0.9")
     dm = DistanceMetric.get_metric(metric, **kwargs)
     D12 = dm.pairwise(self.X1)
     assert_array_almost_equal(D12, D_true)
コード例 #29
0
def brute_force_neighbors(X, Y, k, metric, **kwargs):
    D = DistanceMetric.get_metric(metric, **kwargs).pairwise(Y, X)
    ind = np.argsort(D, axis=1)[:, :k]
    dist = D[np.arange(Y.shape[0])[:, None], ind]
    return dist, ind
コード例 #30
0
    """ Retrieve the row of a condensed matrix from the x index of its corresponding square matrix. """
    row = np.empty(shape=(n, ), dtype=cndsd_matrix.dtype)
    for y in range(n):
        if y == x:
            row[y] = 0
            continue
        row[y] = square_idx_to_condensed(cndsd_matrix, x, y, n)
    return row


def square_rows_idx_to_condensed_rows(cndsd_matrix, indexes, n):
    """ Retrieve the rows of a condensed matrix from the indexes x indexes of its corresponding square matrix. """
    rows = np.empty(shape=(n, len(indexes)), dtype=cndsd_matrix.dtype)
    for i, x in enumerate(indexes):
        row = square_row_idx_to_condensed_row(cndsd_matrix, x, n)
        rows[:, i] = row
    return rows


if __name__ == "__main__":
    data = load_iris().data[:5, :]
    distance_matrix = DistanceMetric.get_metric("euclidean").pairwise(data)
    distance_matrix_condensed = scipy.spatial.distance.pdist(data, "euclidean")

    x, y = 2, None
    print(distance_matrix[x, :])
    print(
        square_row_idx_to_condensed_row(distance_matrix_condensed,
                                        x,
                                        n=distance_matrix.shape[0]))