Example #1
0
def test_load_dexter():
    X, y = load_dexter()
    n_samples = 300
    n_features = 20_000
    assert X.shape == (
        n_samples, n_features
    ), f'Wrong shape: X.shape = {X.shape}, should be (300, 20_000).'
    assert y.shape == (
        n_samples, ), f'Wrong shape: y.shape = {y.shape}, should be (300, ).'
Example #2
0
def dexter():
    X, y = load_dexter()
    print(X)
    print(y)
    y = y.astype(np.int64)
    y[y == -1] = 0
    print(y)
    print('Dexter bin counts', np.bincount(y))
    # TODO:
    # pd.to_csv()
    return X, y
def test_neighbors_dexter(hubness_param, metric):
    hubness, param = hubness_param
    X, y = load_dexter()

    # Hubness in standard spaces
    hub = Hubness(k=10, metric=metric)
    hub.fit(X)
    k_skew_orig = hub.score()

    # Hubness in secondary distance spaces (after hub. red.)
    graph = kneighbors_graph(X,
                             n_neighbors=10,
                             metric=metric,
                             hubness=hubness,
                             hubness_params=param)
    hub = Hubness(k=10, metric='precomputed')
    hub.fit(graph)
    k_skew_hr = hub.score(has_self_distances=True)

    assert k_skew_hr < k_skew_orig * 8/10,\
        f'k-occurrence skewness was not reduced by at least 20% for dexter with {hubness}'
Example #4
0
# d_mle = hub_toolbox.intrinsic_dimension.intrinsic_dimension(vectors)
# # vectors = vectors[:10000, :]
# # d_mle = hub_toolbox.intrinsic_dimension.intrinsic_dimension(vectors)
# # vectors = mnist.data
# # vectors = vectors[:10000, :]
# # d_mle = hub_toolbox.intrinsic_dimension.intrinsic_dimension(vectors)
# D = euclidean_distance(vectors)
#
# S_k, _, _ = hub_toolbox.hubness.hubness(D=D, k=5, metric='distance')
# D_mp = hub_toolbox.global_scaling.mutual_proximity_empiric(
#         D=D, metric='distance')
# S_k_mp, _, _ = hub_toolbox.hubness.hubness(D=D_mp, k=5, metric='distance')
#
# print(S_k, S_k_mp)

from skhubness.data import load_dexter

X, y = load_dexter()

from skhubness import Hubness
hub = Hubness(k=10, metric='cosine')
hub.fit(X)
k_skew = hub.score()
print(f'Skewness = {k_skew:.3f}')

from skhubness.neighbors import kneighbors_graph
k = 5
# neigbor_graph = kneighbors_graph(X, n_neighbors=k, hubness='mutual_proximity')
neigbor_graph = kneighbors_graph(X, n_neighbors=k, hubness=None)
neighbor_matrix = neigbor_graph.indices.reshape((X.shape[0], k))
print(neighbor_matrix)