Python NCVis Exemples, ncvis.NCVis Python Exemples

Exemple #1

0

Afficher le fichier

def test_parallel():
    np.random.seed(42)
    X = np.random.random((10**3, 10))
    distances = ['euclidean', 'cosine', 'correlation', 'inner_product']
    n_threads = [1, 2]
    for distance in distances:
        print("Distance:", distance)
        times = {}
        for n_th in n_threads:
            vis = ncvis.NCVis(n_neighbors=15,
                              M=16,
                              ef_construction=200,
                              random_seed=42,
                              n_init_epochs=20,
                              n_epochs=50,
                              min_dist=0.4,
                              n_threads=n_th,
                              distance=distance)
            start = time.time()
            Y = vis.fit_transform(X)
            stop = time.time()
            times[n_th] = stop - start

            print("n_threads = {}, time = {:.2f}s".format(n_th, times[n_th]))
            if n_th > 1:
                eff = times[1] / (times[n_th] * n_th)
                assert eff > 0.3, "Parallelization efficiency is too low"

Exemple #2

0

Afficher le fichier

def test_1d_clustering():
    np.random.seed(42)
    n = 50
    X = np.concatenate(
        (np.random.normal(-1, 1.5, (n, 1)), np.random.normal(1, 1.5, (n, 1))))

    vis = ncvis.NCVis(n_neighbors=15,
                      M=16,
                      ef_construction=200,
                      d=1,
                      n_init_epochs=20,
                      n_epochs=50,
                      min_dist=0.4,
                      n_threads=-1,
                      distance="euclidean",
                      random_seed=42)
    Y = vis.fit_transform(X).ravel()
    n_pos = np.count_nonzero(Y - Y.mean() > 0)
    assert np.abs(n_pos - n) < 5, "Clustering quality is too poor"

Exemple #3

0

Afficher le fichier

def test_distances():
    np.random.seed(42)
    X = np.random.random((5, 3))
    distances = ['euclidean', 'cosine', 'correlation', 'inner_product']
    for distance in distances:
        vis = ncvis.NCVis(n_neighbors=15,
                          M=16,
                          ef_construction=200,
                          random_seed=42,
                          n_init_epochs=20,
                          n_epochs=50,
                          min_dist=0.4,
                          n_threads=-1,
                          distance=distance)
        Y = vis.fit_transform(X)
        all_finite = np.all(np.isfinite(Y))
        print("Distance:", distance)
        print("Input:")
        print(X)
        print("Output:")
        print(Y)
        assert all_finite, "All entries must be finite"

Exemple #4

0

Afficher le fichier

Fichier : NCVis.py Projet : iggisv9t/dimreducers-crusher

 def __init__(self, d: int = 2, random_state: int = 0, **kwargs):
     import ncvis
     super().__init__(d, random_state)
     self._main = ncvis.NCVis(d=d, random_seed=random_state, **kwargs)

Exemple #5

0

Afficher le fichier

Fichier : ncvis.py Projet : davisidarta/topometry

def NCVis(
data,
n_components=2,
n_jobs=-1,
n_neighbors=15,
distance="cosine",
M=15,
efC=30,
random_seed=42,
n_epochs=50,
n_init_epochs=20,
spread=1.0,
min_dist=0.4,
alpha=1.0,
a=None,
b=None,
alpha_Q=1.,
n_noise=None,
):
"""
Runs Noise Contrastive Visualization ((NCVis)[https://dl.acm.org/doi/abs/10.1145/3366423.3380061])
for dimensionality reduction and graph layout .

Parameters
----------
n_components : int
Desired dimensionality of the embedding.
n_jobs : int
The maximum number of threads to use. In case n_threads < 1, it defaults to the number of available CPUs.
n_neighbors : int
Number of nearest neighbours in the high dimensional space to consider.
M : int
The number of bi-directional links created for every new element during construction of HNSW.
See https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md
efC : int
The size of the dynamic list for the nearest neighbors (used during the search) in HNSW.
See https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md
random_seed : int
Random seed to initialize the generators. Notice, however, that the result may still depend on the number of threads.
n_epochs : int
The total number of epochs to run. During one epoch the positions of each nearest neighbors pair are updated.
n_init_epochs : int
The number of epochs used for initialization. During one epoch the positions of each nearest neighbors pair are updated.
spread : float
The effective scale of embedded points. In combination with ``min_dist``
this determines how clustered/clumped the embedded points are.
See https://github.com/lmcinnes/umap/blob/834184f9c0455f26db13ab148c0abd2d3767d968/umap/umap_.py#L1143
min_dist : float
The effective minimum distance between embedded points. Smaller values
will result in a more clustered/clumped embedding where nearby points
on the manifold are drawn closer together, while larger values will
result on a more even dispersal of points. The value should be set
relative to the ``spread`` value, which determines the scale at which
embedded points will be spread out.
See https://github.com/lmcinnes/umap/blob/834184f9c0455f26db13ab148c0abd2d3767d968/umap/umap_.py#L1135
a : (optional, default None)
More specific parameters controlling the embedding. If None these values
are set automatically as determined by ``min_dist`` and ``spread``.
See https://github.com/lmcinnes/umap/blob/834184f9c0455f26db13ab148c0abd2d3767d968/umap/umap_.py#L1179
b : (optional, default None)
More specific parameters controlling the embedding. If None these values
are set automatically as determined by ``min_dist`` and ``spread``.
See https://github.com/lmcinnes/umap/blob/834184f9c0455f26db13ab148c0abd2d3767d968/umap/umap_.py#L1183
alpha : float
Learning rate for the embedding positions.
alpha_Q : float
Learning rate for the normalization constant.
n_noise : int or ndarray of ints
Number of noise samples to use per data sample. If ndarray is provided, n_epochs is set to its length.
If n_noise is None, it is set to dynamic sampling with noise level gradually increasing
from 0 to fixed value.
distance : str {'euclidean', 'cosine', 'correlation', 'inner_product'}
Distance to use for nearest neighbors search.

"""

try:
import ncvis
except ImportError(
'NCVis is needed for this embedding. Install it with `pip install ncvis`'
):
return print(
'NCVis is needed for this embedding. Install it with `pip install ncvis`'
)

ncvis_emb = ncvis.NCVis(d=n_components,
n_threads=n_jobs,
n_neighbors=n_neighbors,
M=M,
ef_construction=efC,
random_seed=random_seed,
n_epochs=n_epochs,
n_init_epochs=n_init_epochs,
spread=spread,
min_dist=min_dist,
a=a,
b=b,
alpha=alpha,
alpha_Q=alpha_Q,
n_noise=n_noise,
distance=distance).fit_transform(data)

return ncvis_emb

Exemple #6

0

Afficher le fichier

Fichier : test_init.py Projet : curtisalexander/staged-recipes

import ncvis

vis = ncvis.NCVis(n_neighbors=15,
                  M=16,
                  ef_construction=200,
                  n_init_epochs=20,
                  n_epochs=50,
                  min_dist=0.4,
                  n_threads=-1,
                  distance='euclidean')