Esempi in Python per UMAP.fit, esempi in Python per umap.UMAP.fit

Esempio n. 1

0

Mostra file

def test_umap_bad_n_jobs(nn_data):
    u = UMAP(n_jobs=-2)
    with pytest.raises(ValueError):
        u.fit(nn_data)
    u = UMAP(n_jobs=0)
    with pytest.raises(ValueError):
        u.fit(nn_data)

Esempio n. 2

0

Mostra file

def test_densmap_frac(nn_data):
    u = UMAP(densmap=True, dens_frac=-1.0)
    with pytest.raises(ValueError):
        u.fit(nn_data)
    u = UMAP(densmap=True, dens_frac=2.0)
    with pytest.raises(ValueError):
        u.fit(nn_data)

Esempio n. 3

0

Mostra file

File: TCGA_SVC.py Progetto: adamritter/MethylNet-1

def run_svc(train_pkl, val_pkl, test_pkl, series=False, outcome_col='Disease_State', num_random_search=0):
    train_methyl_array, val_methyl_array, test_methyl_array = MethylationArray.from_pickle(train_pkl), MethylationArray.from_pickle(val_pkl), MethylationArray.from_pickle(test_pkl)
    umap = UMAP(n_components=100)
    umap.fit(train_methyl_array.beta)
    train_methyl_array.beta = pd.DataFrame(umap.transform(train_methyl_array.beta.values),index=train_methyl_array.return_idx())
    val_methyl_array.beta = pd.DataFrame(umap.transform(val_methyl_array.beta),index=val_methyl_array.return_idx())
    test_methyl_array.beta = pd.DataFrame(umap.transform(test_methyl_array.beta),index=test_methyl_array.return_idx())

    model = SVC
    model = MachineLearning(model,options={'penalty':'l2','verbose':3,'n_jobs':35,'class_weight':'balanced'},grid={'C':[1,10,100,1000], 'gamma':[1,0.1,0.001,0.0001], 'kernel':['linear','rbf']},
                            n_eval=num_random_search,
                            series=series,
                            labelencode=True,
                            verbose=True)

    sklearn_model=model.fit(train_methyl_array,val_methyl_array,outcome_col)
    pickle.dump(sklearn_model,open('sklearn_model.p','wb'))

    y_pred = model.predict(test_methyl_array)
    pd.DataFrame(np.hstack((y_pred[:,np.newaxis],test_methyl_array.pheno[outcome_col].values[:,np.newaxis])),index=test_methyl_array.return_idx(),columns=['y_pred','y_true']).to_csv('SklearnPredictions.csv')

    original, std_err, (low_ci,high_ci) = model.return_outcome_metric(test_methyl_array, outcome_col, accuracy_score, run_bootstrap=True)

    results={'score':original,'Standard Error':std_err, '0.95 CI Low':low_ci, '0.95 CI High':high_ci}

    print('\n'.join(['{}:{}'.format(k,v) for k,v in results.items()]))

Esempio n. 4

0

Mostra file

File: test_umap_validation_params.py Progetto: ginihumer/latent-projective-interventions

def test_umap_too_many_neighbors_warns(nn_data):
    u = UMAP(a=1.2, b=1.75, n_neighbors=2000, n_epochs=11, init="random")
    u.fit(
        nn_data[:100,]
    )
    assert_equal(u._a, 1.2)
    assert_equal(u._b, 1.75)

Esempio n. 5

0

Mostra file

def cmp3(Y1, Y2, X1, X2, title=('1', '2'), red=None, save=None):
    '''add a comparison, where all labels are kept'''

    sns.set(font_scale=1.2, style='white')
    if not red:
        red = UMAP()
        red.fit(np.vstack((X1, X2)))
    plt.figure(figsize=(24, 8))

    #plt.tight_layout()
    ax = plt.subplot(131)
    umap(X1, Y1, red, show=False, title=title[0], size=4, markerscale=4)
    ax = plt.subplot(132)
    umap(X2, Y2, red, show=False, title=title[1], size=4, markerscale=4)
    ax = plt.subplot(133)
    umap(np.vstack((X1, X2)), [1] * len(Y1) + [2] * len(Y2),
         red,
         show=False,
         title="Combined",
         size=4,
         markerscale=4,
         acc={
             1: title[0],
             2: title[1]
         })
    if save:
        plt.tight_layout()
        plt.savefig(save, dpi=300)
    plt.show()

Esempio n. 6

0

Mostra file

def plot_blobclust(Y1, X1, X2, red=None, save=None):
    sns.set(font_scale=1.2, style='white')
    if not red:
        red = UMAP()
        red.fit(np.vstack((X1, X2)))
    plt.figure(figsize=(12, 12))
    #plt.tight_layout()     old markers..
    #umap(X1,Y1[:X1.shape[0]],red,show=False,title="combined clustering",size=30,markerscale=4,marker='_')
    #umap(X2,Y1[X1.shape[0]:],red,show=False,title="combined clustering",size=30,markerscale=4,marker='|')
    fill = lambda col: {"marker": 'o'}
    empty = lambda col: {'facecolors': 'none', 'edgecolors': col}
    #fill = lambda col: {"marker": "o"}
    #empty = lambda col:{"marker": mpl.markers.MarkerStyle('o','none')}  #{"marker":'o','fillstyle':'none'}

    umap(X1,
         Y1[:X1.shape[0]],
         red,
         show=False,
         title="combined clustering",
         size=30,
         markerscale=4,
         getmarker=fill)
    umap(X2,
         Y1[X1.shape[0]:],
         red,
         show=False,
         title="combined clustering",
         size=30,
         markerscale=4,
         getmarker=empty)
    if save:
        plt.tight_layout()
        plt.savefig(save, dpi=300)
    plt.show()

Esempio n. 7

0

Mostra file

File: test_umap_validation_params.py Progetto: pifparfait/umap

def test_umap_inverse_transform_fails_expectedly(sparse_spatial_data, nn_data):
    u = UMAP(n_epochs=11)
    u.fit(sparse_spatial_data[:100])
    assert_raises(ValueError, u.inverse_transform, u.embedding_[:10])
    u = UMAP(metric="dice", n_epochs=11)
    u.fit(nn_data[:100])
    assert_raises(ValueError, u.inverse_transform, u.embedding_[:10])

Esempio n. 8

0

Mostra file

def test_umap_bad_too_large_min_dist(nn_data):
    u = UMAP(min_dist=2.0)
    # a RuntimeWarning about division by zero in a,b curve fitting is expected
    # caught and ignored for this test
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        with pytest.raises(ValueError):
            u.fit(nn_data)

Esempio n. 9

0

Mostra file

File: compare.py Progetto: vu-minh/hc-tsne

def run_umap(args):
    mapper = UMAP(n_neighbors=args.n_neighbors,
                  random_state=args.seed,
                  init="random")
    mapper.fit(X_train, y=y_train)
    Z = mapper.embedding_
    Z_test = mapper.transform(X_test)
    return Z, Z_test

Esempio n. 10

0

Mostra file

def test_umap_bad_output_metric_no_grad(nn_data):
    @numba.njit()
    def dist1(x, y):
        return np.sum(np.abs(x - y))

    u = UMAP(output_metric=dist1)
    with pytest.raises(ValueError):
        u.fit(nn_data)

Esempio n. 11

0

Mostra file

File: test_umap_validation_params.py Progetto: pifparfait/umap

def test_umap_update_bad_params(nn_data):
    dmat = pairwise_distances(nn_data[:100])
    u = UMAP(metric="precomputed", n_epochs=11)
    u.fit(dmat)
    assert_raises(ValueError, u.update, dmat)

    u = UMAP(n_epochs=11)
    u.fit(nn_data[:100], y=np.repeat(np.arange(5), 20))
    assert_raises(ValueError, u.update, nn_data[100:200])

Esempio n. 12

0

Mostra file

def get_encoder(metas, train_data, target_output_dim):
    tmpdir = metas['workspace']
    model_path = os.path.join(tmpdir, 'umap_model.model')

    model = UMAP(n_components=target_output_dim, random_state=42)
    model.fit(train_data)
    pickle.dump(model, open(model_path, 'wb'))

    return UMAPEncoder(model_path=model_path)

Esempio n. 13

0

Mostra file

File: umap.py Progetto: sfahad1414/whatlies

class Umap:
    """
    This transformer transformers all vectors in an [EmbeddingSet][whatlies.embeddingset.EmbeddingSet]
    by means of umap. We're using the implementation in [umap-learn](https://umap-learn.readthedocs.io/en/latest/).

    Arguments:
        n_components: the number of compoments to create/add
        kwargs: keyword arguments passed to the UMAP algorithm

    Usage:

    ```python
    from whatlies.language import SpacyLanguage
    from whatlies.transformers import Umap

    words = ["prince", "princess", "nurse", "doctor", "banker", "man", "woman",
             "cousin", "neice", "king", "queen", "dude", "guy", "gal", "fire",
             "dog", "cat", "mouse", "red", "blue", "green", "yellow", "water",
             "person", "family", "brother", "sister"]

    lang = SpacyLanguage("en_core_web_md")
    emb = lang[words]

    emb.transform(Umap(3)).plot_interactive_matrix('umap_0', 'umap_1', 'umap_2')
    ```
    """
    def __init__(self, n_components=2, **kwargs):
        self.is_fitted = False
        self.n_components = n_components
        self.kwargs = kwargs
        self.tfm = UMAP(n_components=n_components, **kwargs)

    def __call__(self, embset):
        if not self.is_fitted:
            self.fit(embset)
        return self.transform(embset)

    def fit(self, embset):
        names, X = embset_to_X(embset=embset)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UserWarning)
            warnings.simplefilter("ignore", category=NumbaPerformanceWarning)
            self.tfm.fit(X)
        self.is_fitted = True

    def transform(self, embset):
        names, X = embset_to_X(embset=embset)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=NumbaPerformanceWarning)
            new_vecs = self.tfm.transform(X)
        names_out = names + [f"umap_{i}" for i in range(self.n_components)]
        vectors_out = np.concatenate([new_vecs, np.eye(self.n_components)])
        new_dict = new_embedding_dict(names_out, vectors_out, embset)
        return EmbeddingSet(new_dict,
                            name=f"{embset.name}.umap_{self.n_components}()")

Esempio n. 14

0

Mostra file

def _umap_projection(embeddings, n_axes, **kwargs):
    embeddings_matrix = np.stack(embeddings.values())
    umap = UMAP()

    umap.fit(embeddings_matrix)
    projected_matrix = umap.transform(embeddings_matrix)
    projected_emebddings = {
        embedding_id: projected_matrix[i, :]
        for i, embedding_id in enumerate(embeddings)
    }
    return projected_emebddings

Esempio n. 15

0

Mostra file

File: test_umap_validation_params.py Progetto: ginihumer/latent-projective-interventions

def test_umap_fit_instance_returned():
    # Test that fit returns a new UMAP instance

    # Passing both data and targets
    u = UMAP()
    x = np.random.uniform(0, 1, (256, 10))
    y = np.random.randint(10, size=(256,))
    res = u.fit(x, y)
    assert isinstance(res, UMAP)

    # Passing only data
    u = UMAP()
    x = np.random.uniform(0, 1, (256, 10))
    res = u.fit(x)
    assert isinstance(res, UMAP)

Esempio n. 16

0

Mostra file

def get_umap_layout(**kwargs):
    '''Get the x,y positions of images passed through a umap projection'''
    print(' * creating UMAP layout')
    out_path = get_path('layouts', 'umap', **kwargs)
    if os.path.exists(out_path) and kwargs['use_cache']: return out_path
    model = UMAP(n_neighbors=kwargs['n_neighbors'],
                 min_dist=kwargs['min_distance'],
                 metric=kwargs['metric'])
    # run PCA to reduce dimensionality of image vectors
    w = PCA(n_components=min(100, len(kwargs['vecs']))).fit_transform(
        kwargs['vecs'])
    # fetch categorical labels for images (if provided)
    y = []
    if kwargs.get('metadata', False):
        labels = [i.get('label', None) for i in kwargs['metadata']]
        # if the user provided labels, integerize them
        if any([i for i in labels]):
            d = defaultdict(lambda: len(d))
            for i in labels:
                if i == None: y.append(-1)
                else: y.append(d[i])
            y = np.array(y)
    # project the PCA space down to 2d for visualization
    z = model.fit(w, y=y if np.any(y) else None).embedding_
    return write_layout(out_path, z, **kwargs)

Esempio n. 17

0

Mostra file

File: manifold.py Progetto: mayalenE/image_representation

class UMAPRepresentation(Representation):

    @staticmethod
    def default_config():
        default_config = Dict()

        # parameters
        default_config.parameters = Dict()
        default_config.parameters.n_neighbors = 15
        default_config.parameters.metric = 'euclidean'
        default_config.parameters.init = 'spectral'
        default_config.parameters.random_state = None
        default_config.parameters.min_dist = 0.1

        return default_config

    def __init__(self, n_features=28 * 28, n_latents=10, config={}, **kwargs):
        Representation.__init__(self, config=config, **kwargs)

        # input size (flatten)
        self.n_features = n_features
        # latent size
        self.n_latents = n_latents
        # feature range
        self.feature_range = (0.0, 1.0)

        self.algorithm = UMAP()
        self.update_algorithm_parameters()

    def fit(self, X_train, update_range=True):
        ''' 
        X_train: array-like (n_samples, n_features)
        '''
        X_train = np.nan_to_num(X_train)
        if update_range:
            self.feature_range = (X_train.min(axis=0), X_train.max(axis=0))  # save (min, max) for normalization
        X_train = (X_train - self.feature_range[0]) / (self.feature_range[1] - self.feature_range[0])
        self.algorithm.fit(X_train)

    def calc_embedding(self, x):
        x = (x - self.feature_range[0]) / (self.feature_range[1] - self.feature_range[0])
        x = self.algorithm.transform(x)
        return x

    def update_algorithm_parameters(self):
        self.algorithm.set_params(n_components=self.n_latents, **self.config.parameters, verbose=False)

Esempio n. 18

0

Mostra file

def test_umap_custom_distance_w_grad(nn_data):
    @numba.njit()
    def dist1(x, y):
        return np.sum(np.abs(x - y))

    @numba.njit()
    def dist2(x, y):
        return np.sum(np.abs(x - y)), (x - y)

    u = UMAP(metric=dist1, n_epochs=11)
    with pytest.warns(UserWarning) as warnings:
        u.fit(nn_data[:10])
    assert len(warnings) >= 1

    u = UMAP(metric=dist2, n_epochs=11)
    with pytest.warns(UserWarning) as warnings:
        u.fit(nn_data[:10])
    assert len(warnings) <= 1

Esempio n. 19

0

Mostra file

def test_ingest_map_embedding_umap():
    adata_ref = sc.AnnData(X)
    adata_new = sc.AnnData(T)

    sc.pp.neighbors(adata_ref,
                    method='umap',
                    use_rep='X',
                    n_neighbors=4,
                    random_state=0)
    sc.tl.umap(adata_ref, random_state=0)

    ing = sc.tl.Ingest(adata_ref)
    ing.fit(adata_new)
    ing.map_embedding(method='umap')

    reducer = UMAP(min_dist=0.5, random_state=0, n_neighbors=4)
    reducer.fit(X)
    umap_transformed_t = reducer.transform(T)

    assert np.allclose(ing._obsm['X_umap'], umap_transformed_t)

Esempio n. 20

0

Mostra file

File: umap_playground.py Progetto: sirbiscuit/outerspace

    def run_transformation(self, X, y, transformation_params, callback):
        class CallbackAdapter:
            def __init__(self, callback):
                self.callback = callback

            def __call__(self, iteration, embedding):
                self.callback('embedding', iteration,
                              dict(embedding=embedding))

        callback_adapter = CallbackAdapter(callback)

        umap = UMAP(callback=callback_adapter, **transformation_params)
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=NumbaWarning)
            try:
                callback('start', 0, None)
                callback('status', 0, dict(message='Initializing UMAP'))
                umap.fit(X, y)
            except Exception as e:
                callback('error', 0, dict(message=str(e)))

Esempio n. 21

0

Mostra file

def test_umap_bad_metrics(nn_data):
    u = UMAP(metric="foobar")
    with pytest.raises(ValueError):
        u.fit(nn_data)
    u = UMAP(metric=2.75)
    with pytest.raises(ValueError):
        u.fit(nn_data)
    u = UMAP(output_metric="foobar")
    with pytest.raises(ValueError):
        u.fit(nn_data)
    u = UMAP(output_metric=2.75)
    with pytest.raises(ValueError):
        u.fit(nn_data)

Esempio n. 22

0

Mostra file

def plot_umap(X,
              Y,
              validation_data=None,
              style='starplot',
              p_size=3.5,
              save_img=False,
              img_res=300,
              fig_res=72,
              random_state=None):
    from umap import UMAP

    if validation_data is None:
        validation_data = (X, Y)
    X_valid, Y_valid = validation_data
    if style == 'starplot':
        plt.style.use(['dark_background'])
        plt.rcParams['figure.figsize'] = (15, 15)
        plt.rcParams['font.family'] = 'sans-serif'
        plt.rcParams['font.size'] = 14
        plt.rcParams['figure.dpi'] = fig_res
    umap = UMAP(25, random_state=random_state)
    umap.fit(X, Y.ravel())
    embedings = umap.transform(X_valid)
    embedings = np.array(embedings)
    size = p_size
    cmap = LinearSegmentedColormap.from_list("recy", ["magenta", "cyan"])
    for point in range(1, 10):
        plt.scatter(
            embedings[:, 0],
            embedings[:, 1],
            c=Y_valid.ravel(),
            cmap=cmap,
            s=5 * point**size,
            alpha=1 / (point**size),
            edgecolors='',
        )
    file_name = './plots/s' + str(int(size)) + '_umap.png'
    if save_img:
        os.makedirs(os.path.dirname(file_name), exist_ok=True)
        plt.savefig(file_name, dpi=img_res, transparent=True)
    plt.show()

Esempio n. 23

0

Mostra file

File: utils.py Progetto: sedelmeyer/nyc-capital-projects

def draw_umap(
    data,
    n_neighbors=15,
    min_dist=0.1,
    c=None,
    n_components=2,
    metric="euclidean",
    title="",
    plot=True,
    cmap=None,
    use_plotly=False,
    **kwargs,
):
    """Generate plot of UMAP algorithm results based on specified arguments
    """
    fit = UMAP(
        n_neighbors=n_neighbors,
        min_dist=min_dist,
        n_components=n_components,
        metric=metric,
        random_state=42,
    )
    mapper = fit.fit(data)
    u = fit.transform(data)
    if plot:
        if use_plotly:
            fig = px.scatter(
                x=u[:, 0], y=u[:, 1], color=c, title=title, **kwargs
            )
            fig.update_layout(
                {
                    "plot_bgcolor": "rgba(0, 0, 0, 0)",
                    "paper_bgcolor": "rgba(0, 0, 0, 0)",
                }
            )
            fig.show()
        else:
            fig = plt.figure()
            if n_components == 1:
                ax = fig.add_subplot(111)
                ax.scatter(u[:, 0], range(len(u)), c=c)
            if n_components == 2:
                ax = fig.add_subplot(111)
                scatter = ax.scatter(u[:, 0], u[:, 1], c=c, label=c, cmap=cmap)
            if n_components == 3:
                ax = fig.add_subplot(111, projection="3d")
                ax.scatter(u[:, 0], u[:, 1], u[:, 2], c=c, s=100)
            plt.title(title, fontsize=18)
            legend = ax.legend(*scatter.legend_elements())
            ax.add_artist(legend)

    return u, mapper

Esempio n. 24

0

Mostra file

File: graph_embedder.py Progetto: fabriziocosta/EGO

class UmapTransformer(object):
    def __init__(self,
                 n_components=2,
                 embed_n_neighbors=10,
                 target_metric='categorical'):
        self.umap = UMAP(n_components=n_components,
                         n_neighbors=embed_n_neighbors,
                         target_metric=target_metric,
                         transform_seed=1)
        self.rf = RandomForestRegressor(n_estimators=300)

    def fit(self, x, y=None):
        self.umap.fit(x, y)
        x_low = self.umap.transform(x)
        self.rf.fit(x, x_low)
        return self

    def transform(self, x):
        return self.rf.predict(x)

    def fit_transform(self, x, y):
        return self.fit(x, y).transform(x)

Esempio n. 25

0

Mostra file

File: ensemble.py Progetto: ueda-hiroyuki/machine_learning

def run_dimention_reduction(train_x, test_x, train_y):
    """
    次元削減を行う関数(PCA ⇒ UMAP)
    """
    # 始めにPCAで元の1/2に次元削減する
    n_components = round(len(train_x.columns) * 0.5)
    pca = PCA(n_components=n_components).fit(train_x)
    reduced_train_x = pd.DataFrame(pca.transform(train_x))
    reduced_test_x = pd.DataFrame(pca.transform(test_x))

    # UMAPで2次元に削減
    reducer = UMAP(random_state=0)
    reducer.fit(reduced_train_x)
    reduced_train_x = pd.DataFrame(reducer.transform(reduced_train_x))
    reduced_test_x = pd.DataFrame(reducer.transform(reduced_test_x))

    # 標準化
    reduced_train_x = cf.standardize(reduced_train_x)
    reduced_test_x = cf.standardize(reduced_test_x)

    reduced_train_x.columns = ["umap_1", "umap_2"]
    reduced_test_x.columns = ["umap_1", "umap_2"]
    return reduced_train_x, reduced_test_x

Esempio n. 26

0

Mostra file

def predict_adata(model,
                  adata,
                  make_umap=True,
                  umap_fit_n=10000,
                  batch_size=1024):
    dl = get_prediction_dataloader(adata, model.genes, batch_size=1024)
    logging.info(f'starting prediction of {dl.dataset.adata.shape[0]} cells')
    emb, y_prob = predict_dl(dl, model)
    a = dl.dataset.adata
    a.obsm['X_emb'] = emb

    if make_umap:
        u = UMAP()
        idxs = np.random.choice(np.arange(a.shape[0]),
                                size=min(umap_fit_n, a.shape[0]),
                                replace=False)
        u.fit(emb[idxs])
        a.obsm['X_umap'] = u.transform(emb)

    a.obsm['prediction_probs'] = y_prob

    a.obs['y_pred'] = [np.argmax(probs) for probs in y_prob]
    a.obs['predicted_cell_type_probability'] = [
        np.max(probs) for probs in y_prob
    ]
    a.obs['predicted_cell_type'] = [
        model.classes[np.argmax(probs)] for probs in y_prob
    ]

    prob_df = pd.DataFrame(data=a.obsm['prediction_probs'],
                           columns=model.classes,
                           index=a.obs.index.to_list())
    prob_df.columns = [f'probability {c}' for c in prob_df.columns]

    a.obs = pd.concat((a.obs, prob_df), axis=1)

    return a

Esempio n. 27

0

Mostra file

def umap_reduce(docvecs, label, umap_model, use_nn, use_umap, **kwargs):
    if not use_umap:
        return np.array(docvecs), None

    if not umap_model:
        print(f"Train UMAP...")
        umap_n_components = min(256, len(docvecs)-2) if use_nn else 1
        umap_model = UMAP(metric="cosine", set_op_mix_ratio=1.0,
                          n_components=umap_n_components, random_state=42,
                          verbose=False)
        umap_model = umap_model.fit(docvecs, y=label)
    dim_reduced_vecs = umap_model.transform(docvecs)
    if not use_nn:
        dim_reduced_vecs = dim_reduced_vecs.astype(float)
    return dim_reduced_vecs, umap_model

Esempio n. 28

0

Mostra file

File: dimensionality_reduction.py Progetto: ueda-hiroyuki/machine_learning

def run_dimention_reduction(train_x, test_x, train_y):
    """
    次元削減を行う関数(PCA ⇒ UMAP)
    """
    # 始めにPCAで元の1/2に次元削減する
    n_components = round(len(train_x.columns) * 0.5)
    pca = PCA(n_components=n_components).fit(train_x)
    reduced_train_x = pd.DataFrame(pca.transform(train_x))
    reduced_test_x = pd.DataFrame(pca.transform(test_x))

    # UMAPで2次元に削減
    reducer = UMAP(random_state=0)
    reducer.fit(reduced_train_x)
    reduced_train_x = pd.DataFrame(reducer.transform(reduced_train_x))
    reduced_test_x = pd.DataFrame(reducer.transform(reduced_test_x))
    reduced_train_x.columns = ["umap_1", "umap_2"]
    reduced_test_x.columns = ["umap_1", "umap_2"]

    # df = pd.concat([reduced_train_x, train_y], axis=1)
    # plt.figure()
    # plt.scatter(df.loc[:, 0], df.loc[:, 1], c=df.loc[:, "y"])
    # plt.colorbar()
    # plt.savefig(f"{DATA_DIR}/dimension_reduction.png")
    return reduced_train_x, reduced_test_x

Esempio n. 29

0

Mostra file

File: generative.py Progetto: neurospin/pynet

 def get_similarity_matrix(data,
                           n_components_umap=2,
                           n_neighbors_knn=10,
                           random_state=None):
     """ The similarity matrix is derived in an unsupervised way
     (e.g., UMAP projection of the data and k-nearest-neighbors or
     distance thresholding to define the adjacency matrix for the batch),
     but can also be used to include weakly-supervised information (e.g.,
     knowledge about diseased vs. non-diseased patients). If labels
     are available, the model could even be used to derive a latent
     representation with supervision. Thesimilarity feature in MoE-Sim-VAE
     thus allows to include prior knowledge about the best similarity
     measure on the data.
     """
     flat_data = data.reshape(len(data), -1)
     reducer = UMAP(n_components=n_components_umap,
                    random_state=random_state)
     reducer.fit(flat_data)
     embedding = reducer.transform(flat_data)
     neigh = NearestNeighbors(n_neighbors=n_neighbors_knn)
     neigh.fit(embedding)
     similarity = neigh.kneighbors_graph(embedding).toarray()
     similarity = similarity.astype(np.float32)
     return similarity, embedding

Esempio n. 30

0

Mostra file

File: reduction.py Progetto: postBG/hidden-stratification

class UMAPReducer(Reducer):
    """
    Simple wrapper for UMAP, used for API consistency.
    """
    def __init__(self, n_components=2, **kwargs):
        self.n_components = n_components
        kwargs = {**{'n_neighbors': 10, 'min_dist': 0.}, **kwargs}
        self.model = UMAP(n_components=n_components, **kwargs)

    def fit(self, X):
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', NumbaWarning)
            self.model.fit(X)
        return self

    def transform(self, X):
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', NumbaWarning)
            result = self.model.transform(X)
        return result

    def decrement_components(self):
        self.n_components -= 1
        self.model.n_components -= 1