def test_input_data_size(): # Regression test for #6288 # Previoulsly, a metric requiring a particular input dimension would fail def custom_metric(x, y): assert x.shape[0] == 3 return np.sum((x - y) ** 2) rng = check_random_state(0) X = rng.rand(10, 3) pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func, p=2) eucl = DistanceMetric.get_metric("euclidean") assert_array_almost_equal(pyfunc.pairwise(X), eucl.pairwise(X))
def test_pyfunc_metric(): def dist_func(x1, x2, p): return np.sum((x1 - x2) ** p) ** (1. / p) X = np.random.random((10, 3)) euclidean = DistanceMetric.get_metric("euclidean") pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func, p=2) D1 = euclidean.pairwise(X) D2 = pyfunc.pairwise(X) assert_allclose(D1, D2)
def get_distance(self, data): if self.distance_metric == SimilarityTerms.EUCLIDEAN: dist = DistanceMetric.get_metric(self.distance_metric) elif self.distance_metric == SimilarityTerms.MAHALAOBIS: self.vi = self.kwargs['VI'] dist = DistanceMetric.get_metric(self.distance_metric, VI=self.vi) elif self.distance_metric == SimilarityTerms.CHEBYSHEV: dist = DistanceMetric.get_metric(self.distance_metric) elif self.distance_metric == SimilarityTerms.MANHATTAN: dist = DistanceMetric.get_metric(self.distance_metric) elif self.distance_metric == SimilarityTerms.MINKOWSKI: self.p = self.kwargs['P'] dist = DistanceMetric.get_metric(self.distance_metric, P=self.p) elif self.distance_metric == SimilarityTerms.WMINKOWSKI: self.p = self.kwargs['P'] self.w = self.kwargs['W'] dist = DistanceMetric.get_metric(self.distance_metric, P=self.p, W=self.w) elif self.distance_metric == SimilarityTerms.SEUCLIDEAN: self.v = self.kwargs['V'] dist = DistanceMetric.get_metric(self.distance_metric, V=self.v) elif self.distance_metric == SimilarityTerms.CUSTOM: self.func = self.kwargs['FUNC'] dist = DistanceMetric.get_metric(metric='pyfunc', func=self.func) distance = dist.pairwise(data) return super().compute_distance(distance, data.index)
def get_distance(self, d_profile_data): """ Given the day profile, return the pairwise distance between each of the two individual series """ data = d_profile_data.copy() data = data.fillna(0) data = data.as_matrix() data_size = data.shape[0] if self.distance_metric != 'self-defined': if self.distance_metric == 'euclidean': dist = DistanceMetric.get_metric('euclidean') elif self.distance_metric == 'mahalanobis': dist = DistanceMetric.get_metric('mahalanobis', VI=self.VI) distance = dist.pairwise(data) else: distance = np.empty((data_size, data_size)) cols = d_profile_data.columns for i in range(data_size): df1 = data[i, :] for j in range(data_size): df2 = data[j, :] if self.mode == 'window-usage': distance[i, j] = self.stat_util.get_statistic_distance( df1, df2, index=cols, mode=self.mode, window=self.window) else: distance[i, j] = self.stat_util.get_statistic_distance( df1, df2, index=cols, mode=self.mode) df = pd.DataFrame(distance) df.columns = d_profile_data.index df.index = d_profile_data.index distance = df x, y = np.meshgrid(distance.index, distance.columns) df = pd.DataFrame(columns=["x", "y", "distance"]) df["x"] = y.ravel() df["y"] = x.ravel() df["distance"] = distance.as_matrix().ravel() df = df[df["x"] != df["y"]] df = df.sort_values('distance') df.distance.loc[np.isnan(df.distance)] = 0 return df
def experiment_setup(sat_pos, altitude, src_pos, gst_pos, min_elev, orbits, sat_per_orbit, terrestrial_gst_graph, path_control): sat_sat_dist = compute_sat_sat_distance(sat_pos, altitude, orbits, sat_per_orbit) # Compute the BallTree for the satellites. This gives nn to satellites. sat_tree = BallTree(np.deg2rad(sat_pos), metric=DistanceMetric.get_metric("haversine")) # Get the satellites that are in reach for the ground stations # and their distance. sat_gst_ind, sat_gst_dist = compute_gst_sat_distance( altitude, min_elev, gst_pos, sat_tree) # Compute the terrestrial nearest neighbors to sources src_gst_ind, src_gst_dist = src_nearest_gst_distance( src_pos, gst_pos, path_control) # Get the terrestrial GST -> GST distance gst_gst_terrestrial = gst_gst_terrestrial_distance(terrestrial_gst_graph, gst_pos) # Get the satellite GST -> GST distance gst_gst_satellite = gsts_optimization(sat_gst_ind, sat_gst_dist, sat_sat_dist, n_gsts=gst_pos.shape[0]) return src_gst_ind, src_gst_dist, gst_gst_terrestrial, gst_gst_satellite
def src_nearest_gst_distance(src_pos, gst_pos, nn=1): """INCLUDES PATH STRETCH""" gst_tree = BallTree(np.deg2rad(gst_pos), metric=DistanceMetric.get_metric("haversine")) src_gst_dist, src_gst_ind = gst_tree.query(np.deg2rad(src_pos), k=nn) src_gst_dist = haversine_to_km(src_gst_dist) src_gst_dist = src_gst_dist * FIBER_PATH_STRETCH return src_gst_ind, src_gst_dist
def check_pdist_bool(metric, D_true): dm = DistanceMetric.get_metric(metric) D12 = dm.pairwise(X1_bool) # Based on https://github.com/scipy/scipy/pull/7373 # When comparing two all-zero vectors, scipy>=1.2.0 jaccard metric # was changed to return 0, instead of nan. if metric == 'jaccard' and LooseVersion(scipy_version) < '1.2.0': D_true[np.isnan(D_true)] = 0 assert_array_almost_equal(D12, D_true)
def test_pyfunc_metric(): X = np.random.random((10, 3)) euclidean = DistanceMetric.get_metric("euclidean") pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func, p=2) # Check if both callable metric and predefined metric initialized # DistanceMetric object is picklable euclidean_pkl = pickle.loads(pickle.dumps(euclidean)) pyfunc_pkl = pickle.loads(pickle.dumps(pyfunc)) D1 = euclidean.pairwise(X) D2 = pyfunc.pairwise(X) D1_pkl = euclidean_pkl.pairwise(X) D2_pkl = pyfunc_pkl.pairwise(X) assert_array_almost_equal(D1, D2) assert_array_almost_equal(D1_pkl, D2_pkl)
def test_kd_tree_two_point(dualtree): n_samples, n_features = (100, 3) rng = check_random_state(0) X = rng.random_sample((n_samples, n_features)) Y = rng.random_sample((n_samples, n_features)) r = np.linspace(0, 1, 10) kdt = KDTree(X, leaf_size=10) D = DistanceMetric.get_metric("euclidean").pairwise(Y, X) counts_true = [(D <= ri).sum() for ri in r] counts = kdt.two_point_correlation(Y, r=r, dualtree=dualtree) assert_array_almost_equal(counts, counts_true)
def test_ball_tree_two_point(n_samples=100, n_features=3): np.random.seed(0) X = np.random.random((n_samples, n_features)) Y = np.random.random((n_samples, n_features)) r = np.linspace(0, 1, 10) bt = BallTree(X, leaf_size=10) D = DistanceMetric.get_metric("euclidean").pairwise(Y, X) counts_true = [(D <= ri).sum() for ri in r] def check_two_point(r, dualtree): counts = bt.two_point_correlation(Y, r=r, dualtree=dualtree) assert_array_almost_equal(counts, counts_true) for dualtree in (True, False): yield check_two_point, r, dualtree
def test_haversine_metric(): def haversine_slow(x1, x2): return 2 * np.arcsin(np.sqrt(np.sin(0.5 * (x1[0] - x2[0])) ** 2 + np.cos(x1[0]) * np.cos(x2[0]) * np.sin(0.5 * (x1[1] - x2[1])) ** 2)) X = np.random.random((10, 2)) haversine = DistanceMetric.get_metric("haversine") D1 = haversine.pairwise(X) D2 = np.zeros_like(D1) for i, x1 in enumerate(X): for j, x2 in enumerate(X): D2[i, j] = haversine_slow(x1, x2) assert_array_almost_equal(D1, D2) assert_array_almost_equal(haversine.dist_to_rdist(D1), np.sin(0.5 * D2) ** 2)
def optimize_end_to_end_latency_rerouting(sat_pos, altitude, gst_pos, src_pos, min_elev, orbits, sat_per_orbit, terrestrial_gst_graph, inactive): # Compute satellite graph distances sat_sat_dist = compute_sat_sat_distance(sat_pos, altitude, orbits, sat_per_orbit) # Compute the BallTree for the satellites. This gives nn to satellites. sat_tree = BallTree(np.deg2rad(sat_pos), metric=DistanceMetric.get_metric("haversine")) # Get the satellites that are in reach for the ground stations # and their distance. sat_gst_ind, sat_gst_dist = compute_gst_sat_distance( altitude, min_elev, gst_pos, sat_tree) # Get the terrestrial GST -> GST distance gst_gst_terrestrial = gst_gst_terrestrial_distance(terrestrial_gst_graph, gst_pos) # Get the satellite GST -> GST distance gst_gst_satellite = gsts_optimization(sat_gst_ind, sat_gst_dist, sat_sat_dist, n_gsts=gst_pos.shape[0]) # Compute the closest active GST to the inactive ones nearest_active, nearest_active_dist = inactive_to_closest_active( inactive, gst_gst_terrestrial) # Get the closest GST to every source and its distance src_gst_ind, src_gst_dist = src_nearest_gst_distance(src_pos, gst_pos) # Put all together and get the src-dst distance matrix n_src = src_pos.shape[0] src_dst_latency = compute_src_dst_latency(n_src, inactive, src_gst_ind, src_gst_dist, nearest_active, nearest_active_dist, gst_gst_satellite) return src_dst_latency, nearest_active
def check_pickle(metric, kwargs): dm = DistanceMetric.get_metric(metric, **kwargs) D1 = dm.pairwise(X1) dm2 = pickle.loads(pickle.dumps(dm)) D2 = dm2.pairwise(X1) assert_array_almost_equal(D1, D2)
def plot_absolute(src_gst_latency, src_src_latency, src_pos): triu = np.triu_indices(src_gst_latency.shape[0], 1) ixp_routed = np.around(src_gst_latency[triu], 6) city_gst = np.around(src_src_latency[triu], 6) SCALING = 1e3 plt.figure(figsize=(8, 6)) pairwise_src = DistanceMetric.pairwise( DistanceMetric.get_metric("haversine"), np.deg2rad(src_pos), np.deg2rad(src_pos)) pairwise_src = pairwise_src * EARTH_RADIUS pairwise = pairwise_src[triu] vals, avg_c, min_c, max_c, _ = vector_map_statistics( pairwise, city_gst, 10) avg_c = np.asarray(avg_c) * SCALING min_c = np.asarray(min_c) * SCALING max_c = np.asarray(max_c) * SCALING plt.plot(vals, avg_c, label="Average city-city", linewidth=3) plt.xlabel("SRC-DST distance (km)") plt.ylabel("Latency (s)") plt.legend(loc=2) pairwise = pairwise_src[triu] vals, avg_g, min_g, max_g, _ = vector_map_statistics( pairwise, ixp_routed, 10) avg_g = np.asarray(avg_g) * SCALING min_g = np.asarray(min_g) * SCALING max_g = np.asarray(max_g) * SCALING plt.plot(vals, avg_g, label="Average IXP-city", linewidth=3) plt.plot(vals, vals / LIGHT_IN_FIBER * SCALING, ':', linewidth=3, label="Great-circle in fiber") plt.plot(vals, vals / LIGHT_IN_VACUUM * SCALING, '--', label="Great-circle in vacuum", linewidth=3) plt.plot(vals, vals * FIBER_PATH_STRETCH / LIGHT_IN_FIBER * SCALING, '-.', label="Path-stretch in fiber", linewidth=3) plt.ylim(0, 150) plt.xlim(0, np.max(vals)) plt.xlabel("SRC-DST great-circle distance (km)") plt.ylabel("One-way latency (s)") plt.legend(loc=9, ncol=2, mode="expand") # Save figures # plt.savefig("figures/latency-distance.pdf") plt.savefig("figures/latency-distance.png")
def plot_relative(src_gst_latency, src_src_latency, src_pos): triu = np.triu_indices(src_gst_latency.shape[0], 1) ixp_routed = np.around(src_gst_latency[triu], 6) city_gst = np.around(src_src_latency[triu], 6) pairwise_src = DistanceMetric.pairwise( DistanceMetric.get_metric("haversine"), np.deg2rad(src_pos), np.deg2rad(src_pos)) pairwise_src = pairwise_src * EARTH_RADIUS percent = (ixp_routed - city_gst) / city_gst * 100 pairwise = pairwise_src[triu] vals, avg_c, min_c, max_c, percent = vector_map_statistics( pairwise, percent, 100, [25, 75]) colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] cur_color = colors[0] plt.figure(figsize=(8, 6)) gs1 = gridspec.GridSpec(2, 1) gs1.update(wspace=0.01, hspace=0.11) # set the spacing between axes. axes = [plt.subplot(gs1[0]), plt.subplot(gs1[1])] axes[0].axhline(0, c='grey', linewidth=0.5) axes[0].semilogy(vals, avg_c, label="Average", c=cur_color, linewidth=3) axes[0].semilogy(vals, percent[25], "--", label="Quartiles", c=cur_color, linewidth=3) axes[0].semilogy(vals, percent[75], "--", c=cur_color, linewidth=3) axes[0].plot(vals, max_c, ":", linewidth=3, label="Min-max variability", c=cur_color) axes[0].set_ylim(90, 1000) axes[0].set_ylabel("log-scale") axes[0].set_xticks([]) axes[0].legend() axes[1].axhline(0, c='grey', linewidth=0.5) axes[1].plot(vals, avg_c, label="Average city-city", c=cur_color, linewidth=3) axes[1].plot(vals, percent[25], "--", label="Quartiles", c=cur_color, linewidth=3) axes[1].plot(vals, percent[75], "--", c=cur_color, linewidth=3) axes[1].plot(vals, max_c, ":", linewidth=3, label="Min-max variability", c=cur_color) axes[1].plot(vals, min_c, ":", linewidth=3, label="Min-max variability", c=cur_color) axes[1].set_xlabel("SRC-DST great-circle distance (km)") axes[1].set_ylabel("Loss IXP deployment (%)") # axes[1].set_ylabel("Latency increase IXP deployment (%)") axes[1].set_ylim(-50, 90) plt.savefig("figures/percent-ixp-loss.png")
def test_pickle_bool_metrics(metric): dm = DistanceMetric.get_metric(metric) D1 = dm.pairwise(X1_bool) dm2 = pickle.loads(pickle.dumps(dm)) D2 = dm2.pairwise(X1_bool) assert_array_almost_equal(D1, D2)
def check_cdist_bool(self, metric, D_true): dm = DistanceMetric.get_metric(metric) D12 = dm.pairwise(self.X1_bool, self.X2_bool) assert_allclose(D12, D_true)
def test_compute_loss_condensed(data, distance_matrix, distance_matrix_condensed, components, fuzzifier): medoids0 = random_choice_idx(data, components=components) true_memberships = _compute_memberships_square(distance_matrix, medoids0, fuzzifier) true_medoids = _compute_medoids_square(distance_matrix, true_memberships, fuzzifier) true_loss = _compute_loss_square(distance_matrix, true_medoids, true_memberships, fuzzifier) loss = _compute_loss_condensed(distance_matrix_condensed, true_medoids, true_memberships, fuzzifier, n=data.shape[0]) assert np.isclose(true_loss, loss) if __name__ == "__main__": components = 3 fuzzifier = 2.0 data = load_iris().data[:5, :] distance_matrix = DistanceMetric.get_metric("euclidean").pairwise(data) distance_matrix_condensed = scipy.spatial.distance.pdist(data, "euclidean") for seed in range(1000): print("Seed", seed) set_manual_seed(seed) test_compute_memberships_square(data, distance_matrix, components, fuzzifier) test_compute_medoids_square(data, distance_matrix, components, fuzzifier) test_compute_loss_square(data, distance_matrix, components, fuzzifier) test_compute_memberships_condensed(data, distance_matrix, distance_matrix_condensed, components, fuzzifier) test_compute_medoids_condensed(data, distance_matrix, distance_matrix_condensed, components, fuzzifier) test_compute_loss_condensed(data, distance_matrix, distance_matrix_condensed, components, fuzzifier)
def compute_distances(): # Load IXP-GST positions altitude = 1150 min_elev = 40 orbits = 32 sat_per_orbit = 50 inclination = 53 gst_file = "data/raw/ixp_geolocation.csv" src_file = "data/raw/WUP2018-F22-Cities_Over_300K_Annual.csv" # Load geo information sat_pos, gst_pos, src_pos = load_locations(altitude, orbits, sat_per_orbit, inclination, gst_file, src_file, time=15000) lon_sort_idx_src = np.argsort(src_pos[:, 1]) src_pos = (src_pos[lon_sort_idx_src]) # Remove SRCs that are too high in latitude higher = np.where(src_pos[:, 0] > 56)[0] src_pos = np.delete(src_pos, higher, axis=0) lon_sort_idx_gst = np.argsort(gst_pos[:, 1]) gst_pos = (gst_pos[lon_sort_idx_gst]) # %% sat_sat_dist = compute_sat_sat_distance(sat_pos, altitude, orbits, sat_per_orbit) # Compute the BallTree for the satellites. This gives nn to satellites. sat_tree = BallTree(np.deg2rad(sat_pos), metric=DistanceMetric.get_metric("haversine")) # Get the satellites that are in reach for the ground stations # and their distance. sat_gst_ind_city, sat_gst_dist_city = compute_gst_sat_distance( altitude, min_elev, src_pos, sat_tree) src_src_satellite = gsts_optimization(sat_gst_ind_city, sat_gst_dist_city, sat_sat_dist, n_gsts=src_pos.shape[0]) src_src_latency = src_src_satellite / LIGHT_IN_VACUUM # %% sat_gst_ind_ixp, sat_gst_dist_ixp = compute_gst_sat_distance( altitude, min_elev, gst_pos, sat_tree) gst_gst_satellite = gsts_optimization(sat_gst_ind_ixp, sat_gst_dist_ixp, sat_sat_dist, n_gsts=gst_pos.shape[0]) src_gst_ind, src_gst_dist = src_nearest_gst_distance(src_pos, gst_pos) n_src = src_pos.shape[0] src_gst_latency = compute_src_dst_latency(n_src, [], src_gst_ind, src_gst_dist, [], [], gst_gst_satellite) return src_gst_latency, src_src_latency, src_pos
def check_cdist_bool(metric, D_true): dm = DistanceMetric.get_metric(metric) D12 = dm.pairwise(X1_bool, X2_bool) assert_array_almost_equal(D12, D_true)
def check_cdist(metric, kwargs, D_true): dm = DistanceMetric.get_metric(metric, **kwargs) D12 = dm.pairwise(X1, X2) assert_array_almost_equal(D12, D_true)
def check_cdist(self, metric, kwargs, D_true): if metric == 'canberra' and cmp_version(scipy.__version__, '0.9') <= 0: raise SkipTest("Canberra distance incorrect in scipy < 0.9") dm = DistanceMetric.get_metric(metric, **kwargs) D12 = dm.pairwise(self.X1, self.X2) assert_array_almost_equal(D12, D_true)
def recommend(track_ids, artist_ids): attributes_api_endpoint = SPOTIFY_API_URL + "/audio-features?ids=" + ",".join( track_ids) attributes_response = requests.get( attributes_api_endpoint, headers={'Authorization': SPOTIFY_TOKEN}) attributes = attributes_response.json()['audio_features'] phantom_average_track = {} target_attributes = [ 'energy', 'liveness', 'tempo', 'speechiness', 'acousticness', 'instrumentalness', 'danceability', 'loudness' ] for attribute in target_attributes: track_sums = 0 track_count = 0 for track in attributes: if track: track_sums += track[attribute] track_count += 1 phantom_average_track[attribute] = track_sums / track_count target_energy = str(round(phantom_average_track['energy'], 2)) target_liveness = str(round(phantom_average_track['liveness'], 2)) target_tempo = str(round(phantom_average_track['tempo'], 2)) target_speechiness = str(round(phantom_average_track['speechiness'], 2)) target_acousticness = str(round(phantom_average_track['acousticness'], 2)) target_instrumentalness = str( round(phantom_average_track['instrumentalness'], 2)) target_danceability = str(round(phantom_average_track['danceability'], 2)) target_loudness = str(round(phantom_average_track['loudness'], 2)) recommendations_api_endpoint = SPOTIFY_API_URL + "/recommendations?seed_artists=" + ",".join(list(artist_ids)[:5])+\ "&target_energy=" + target_energy + "&target_liveness=" + target_liveness + \ "&target_tempo=" + target_tempo + "&target_speechiness=" + target_speechiness + \ "&target_acousticness=" + target_acousticness + "&target_instrumentalness=" + \ target_instrumentalness + "&target_danceability=" + target_danceability + \ "&target_loudness=" + target_loudness + "&limit=20" recommendations_response = requests.get( recommendations_api_endpoint, headers={'Authorization': SPOTIFY_TOKEN}) recommendation_data = dict(ids=[], data=[], artists=[], images=[], titles=[], attributes=[]) recommendation_data['data'] = recommendations_response.json()['tracks'] for track in recommendation_data['data']: if track['id'] not in recommendation_data['ids'] and track[ 'id'] not in track_ids: recommendation_data['titles'].append(track['name']) recommendation_data['artists'].append(track['artists'][0]['name']) recommendation_data['ids'].append(track['id']) recommendation_data['images'].append( track['album']['images'][0]['url']) attributes_api_endpoint = SPOTIFY_API_URL + "/audio-features?ids=" + ",".join( recommendation_data['ids']) attributes_response = requests.get( attributes_api_endpoint, headers={'Authorization': SPOTIFY_TOKEN}) recommendation_data['attributes'] = attributes_response.json( )['audio_features'] recommendation_track_attributes = [] for track in recommendation_data['attributes']: track_float_values = {} for attribute in target_attributes: track_float_values[attribute] = track[attribute] recommendation_track_attributes.append(track_float_values.values()) recommendation_distances = [phantom_average_track.values() ] + recommendation_track_attributes # print phantom_average_track.values() # print "------" # print recommendation_track_attributes dist = DistanceMetric.get_metric('euclidean') distances = dist.pairwise(recommendation_distances)[0] recommendation_data['distances'] = distances[1:len(distances)] sorted_recommendation_indexes = recommendation_data['distances'].argsort( )[:len(recommendation_data['distances'])] for key in recommendation_data.keys(): if recommendation_data[key] != []: recommendation_data[key] = [ recommendation_data[key][i] for i in sorted_recommendation_indexes ] return recommendation_data
def get_distance(self, data): # distance = pairwise_distances(data,metric=self.deep_metric,n_jobs=-1) dist = DistanceMetric.get_metric(metric='pyfunc', func=self.deep_metric) distance = dist.pairwise(data) return self.compute_distance(distance, data.index)
def check_pdist_bool(self, metric, D_true): dm = DistanceMetric.get_metric(metric) D12 = dm.pairwise(self.X1_bool) assert_allclose(D12, D_true)
def check_cdist(self, metric, kwargs, D_true): dm = DistanceMetric.get_metric(metric, **kwargs) D12 = dm.pairwise(self.X1, self.X2) assert_array_almost_equal(D12, D_true)
def check_pdist(self, metric, kwargs, D_true): if metric == 'canberra' and cmp_version(scipy.__version__, '0.9') <= 0: raise SkipTest("Canberra distance incorrect in scipy < 0.9") dm = DistanceMetric.get_metric(metric, **kwargs) D12 = dm.pairwise(self.X1) assert_allclose(D12, D_true)
def check_pdist(self, metric, kwargs, D_true): dm = DistanceMetric.get_metric(metric, **kwargs) D12 = dm.pairwise(self.X1) assert_array_almost_equal(D12, D_true)
def check_pdist(metric, kwargs, D_true): dm = DistanceMetric.get_metric(metric, **kwargs) D12 = dm.pairwise(X1) assert_array_almost_equal(D12, D_true)
def check_pdist_bool(self, metric, D_true): dm = DistanceMetric.get_metric(metric) D12 = dm.pairwise(self.X1_bool) assert_array_almost_equal(D12, D_true)
def brute_force_neighbors(X, Y, k, metric, **kwargs): D = DistanceMetric.get_metric(metric, **kwargs).pairwise(Y, X) ind = np.argsort(D, axis=1)[:, :k] dist = D[np.arange(Y.shape[0])[:, None], ind] return dist, ind
def check_cdist(self, metric, kwargs, D_true): if metric == "canberra" and cmp_version(scipy.__version__, "0.9") <= 0: raise SkipTest("Canberra distance incorrect in scipy < 0.9") dm = DistanceMetric.get_metric(metric, **kwargs) D12 = dm.pairwise(self.X1, self.X2) assert_array_almost_equal(D12, D_true)
def check_pickle_bool(self, metric): dm = DistanceMetric.get_metric(metric) D1 = dm.pairwise(self.X1_bool) dm2 = pickle.loads(pickle.dumps(dm)) D2 = dm2.pairwise(self.X1_bool) assert_array_almost_equal(D1, D2)
def check_pickle(self, metric, kwargs): dm = DistanceMetric.get_metric(metric, **kwargs) D1 = dm.pairwise(self.X1) dm2 = pickle.loads(pickle.dumps(dm)) D2 = dm2.pairwise(self.X1) assert_array_almost_equal(D1, D2)
# def dist_func(a, b): # alpha = 1 # return np.sqrt((a.x - b.x)**2 + # (a.y - b.y)**2 + # alpha*(a.theta - b.theta)**2) def dist_func(a, b): alpha = 1 return np.sqrt((a[0] - b[0])**2 + (a[1] - b[1])**2 + alpha * (a[2] - b[2])**2) pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func) model_states, X = make_states() for i in range(X.shape[0]): print(X[i, :]) print("TREE TIME") tree = KDTree(X, leaf_size=4, metric="euclidean") pts = np.array([(0, 0, 0)]) dist, ind = tree.query(pts, k=1) for i in ind: print(X[i]) print(np.asscalar(i)) print(model_states[np.asscalar(i)]) # print(dist) # print(KDTree.valid_metrics) # a = np.empty((5, 5, 3))