Esempio n. 1
0
def dimensionality_reduction(
        sample: pd.Series,
        background_df: pd.DataFrame,
        genes: List[str],
        col: str,
        method='trimap') -> Tuple[pd.DataFrame, hv.Scatter]:
    """
    Wrapper for returning trimap plot with column for `color_index` and `size_index`

    Args:
        sample: n-of-1 sample. Gets own label
        background_df: Background dataset
        genes: Genes to use in dimensionality reduction
        col: Column to use for color_index
        method: Method of dimensionality reduction. `trimap` or `tsne`

    Returns:
        Holoviews Scatter object of plot with associated vdims
    """
    assert method == 'trimap' or method == 'tsne', '`method` must be either `trimap` or `tsne`'
    combined = background_df.append(sample)
    if method == 'trimap':
        reduced = trimap.TRIMAP().fit_transform(combined[genes])
    else:
        reduced = t_sne.TSNE().fit_transform(combined[genes])
    df = pd.DataFrame(reduced, columns=['x', 'y'])
    df[col] = background_df[col].tolist() + [f'N-of-1 - {sample[col]}']
    df['size'] = [1 for _ in background_df[col]] + [5]
    return df, hv.Scatter(data=df, kdims=['x'], vdims=['y', col, 'size'])
def calculateTestResults():
    for x in pb.progressbar(iter(embeddings), redirect_stdout=True):
        silh = metrics.silhouette_score(x[0], digits_labels)
        davd = metrics.davies_bouldin_score(x[0], digits_labels)
        globalStruct = tri.TRIMAP(verbose=False).global_score(
            digits_hd_data, x[0])
        localStruct = manifold.trustworthiness(digits_hd_data, x[0])
        results.append([silh, davd, globalStruct, localStruct])
        print("Processed a result")

        silh2 = metrics.silhouette_score(x[1], fashion_labels)
        davd2 = metrics.davies_bouldin_score(x[1], fashion_labels)
        globalStruct2 = tri.TRIMAP(verbose=False).global_score(
            fashion_hd_data, x[1])
        localStruct2 = manifold.trustworthiness(fashion_hd_data, x[1])
        results.append([silh2, davd2, globalStruct2, localStruct2])
        print("Processed a result")
def run_DR_Algorithm(name, data_features):

    """
    Runs each DR algorithm and returns the embedding.
    
    Parameters:
    -----------------
    name : String, name of algorithm
    data_features : nD array, original features
    
    Returns:
    -----------------
    points : nD array
        embedding
    """
    int_dim=2

    if name == "UMAP":
        reducer = umap.UMAP(n_neighbors=15, n_components=int_dim)
        points = reducer.fit_transform(data_features)
        
    elif name == "tSNE":
        tsne = TSNE(n_components=int_dim, perplexity=30)
        points = tsne.fit_transform(data_features)
        
    
    elif name == "PCA":
        pca = PCA(n_components=int_dim)
        points = pca.fit_transform(data_features)
        

    elif name == "Trimap":
        points = trimap.TRIMAP().fit_transform(data_features)


    elif name == "M_Core_tSNE":
        tsne = M_TSNE(n_components=int_dim, perplexity=30, n_jobs=8)
        points = tsne.fit_transform(data_features)


    elif name == "MDS":
        mds = MDS(n_components=int_dim)
        points = mds.fit_transform(data_features)


    elif name == "Isomap":
        isomap = Isomap(n_components=int_dim)
        points = isomap.fit_transform(data_features)


    elif name == "KernelPCA":
        kpca = KernelPCA(n_components=int_dim)
        points = kpca.fit_transform(data_features)

    return points
Esempio n. 4
0
def experiment(X, method='PaCMAP', **kwargs):
    if method == 'PaCMAP':
        transformer = PaCMAP(**kwargs)
    elif method == 'UMAP':
        transformer = umap.UMAP(**kwargs)
    elif method == 'TriMAP':
        transformer = trimap.TRIMAP(**kwargs)
    else:
        print("Incorrect method specified")
        assert (False)
    start_time = time()
    X_low = transformer.fit_transform(X)
    total_time = time() - start_time
    return X_low, total_time
Esempio n. 5
0
def embedding(X, args):
    print("... preprocess: normalization and PCA")
    method = args.method
    preprocess_start_time = time.time()
    X = normalize(X)
    if X.shape[1] > 20:
        prep_model = PCA(n_components=20)
        X = prep_model.fit_transform(X)
    preprocess_interval = time.time() - preprocess_start_time

    ##
    """
    print("... saving prepprocessed data")
    filename_x=output_path+"/"+feature+"_x.npy"
    filename_y=output_path+"/"+feature+"_y.npy"
    np.save(filename_x,X)
    np.save(filename_y,Y)
    """
    ##
    print("... embedding")
    embedding_start_time = time.time()
    if method == "song":
        import song
        model = song.song_.SONG(n_max_epoch=n_max_epoch, b=b)
        model.fit(X, Y)
        embedding = model.raw_embeddings[:, :]
    else:
        if method == "tsne":
            model = TSNE(n_components=2, random_state=42)
        elif method == "trimap":
            model = trimap.TRIMAP(n_iters=500)
        else:
            model = umap.UMAP()
        embedding = model.fit_transform(X)
    embedding_interval = time.time() - embedding_start_time
    print("Preprocess time\t{}\n".format(preprocess_interval))
    print("Embedding time\t{}\n".format(embedding_interval))
    return embedding
Esempio n. 6
0
def trimap_fromR(data, n_dims, n_inliers, n_outliers, n_random, distance, lr,
                 n_iters, knn_tuple, apply_pca, opt_method, verbose,
                 weight_adj, return_seq):
    import trimap
    try:
        from StringIO import StringIO
    except ImportError:
        from io import StringIO
    import sys

    class Capturing(list):
        def __enter__(self):
            self._stdout = sys.stdout
            sys.stdout = self._stringio = StringIO()
            return self

        def __exit__(self, *args):
            self.extend(self._stringio.getvalue().splitlines())
            del self._stringio  # free up some memory
            sys.stdout = self._stdout

    knn_tuple = None
    with Capturing() as output:
        embedding = trimap.TRIMAP(
            n_dims=int(n_dims),
            n_inliers=int(n_inliers),
            n_outliers=int(n_outliers),
            n_random=int(n_random),
            distance=str(distance),
            lr=float(lr),
            n_iters=int(n_iters),
            apply_pca=bool(apply_pca),
            opt_method=str(opt_method),
            verbose=bool(verbose),
            weight_adj=float(weight_adj),
            return_seq=bool(return_seq)).fit_transform(data)
    return ([output, embedding])
Esempio n. 7
0
def plot(input_path, resample, output_path, feature,method,limit_length,n_max_epoch,b):
    x=np.load(input_path+"/data_x."+feature+".npy")
    y=np.load(input_path+"/data_y."+feature+".npy")
    print("X (input file):",x.shape)

    ###
    if len(x.shape)==3:
        print("... generating sliding window")
        s=np.load(input_path+"/data_s."+feature+".npy")
        x,y=make_sliding_window(x,y,s,window=10,limit_length=limit_length)
        print("X (sliding window):",x.shape)

    if resample is not None:
        print("... resampling")
        idx=list(range(x.shape[0]))
        np.random.shuffle(idx)
        idx=idx[:resample]
        X=x[idx,:]
        Y=y[idx]
    else:
        X=x
        Y=y
    
    print("... preprocess: normalization and PCA")
    preprocess_start_time = time.time()
    X=normalize(X)
    if X.shape[1]>20:
        prep_model=PCA(n_components=20)
        X=prep_model.fit_transform(X)
    preprocess_interval = time.time() - preprocess_start_time

    ##
    """
    print("... saving prepprocessed data")
    filename_x=output_path+"/"+feature+"_x.npy"
    filename_y=output_path+"/"+feature+"_y.npy"
    np.save(filename_x,X)
    np.save(filename_y,Y)
    """
    ##
    print("... embedding")
    embedding_start_time = time.time()
    if method=="song":
        import song
        model = song.song_.SONG(n_max_epoch=n_max_epoch,b=b)
        model.fit(X, Y)
        embedding=model.raw_embeddings[:,:]
    else:
        if method=="tsne":
            model = TSNE(n_components=2, random_state=42)
        elif method=="trimap":
            model = trimap.TRIMAP(n_iters=500)
        else:
            model = umap.UMAP()
        embedding = model.fit_transform(X)
    embedding_interval = time.time() - embedding_start_time

    print("... plotting embedded points")
    os.makedirs(output_path,exist_ok=True)
    np.save(output_path+"/"+method+"."+feature+".npy",embedding)
    print(method,"time:",embedding_interval)
    
    title=method
    filename=output_path+"/"+method+"."+feature+".png"
    plot_scatter(embedding,Y,filename,title)
    
    print("... evaluation")
    classifier=KNeighborsClassifier(n_neighbors=10)
    pred_y=cross_val_predict(classifier,embedding,Y, cv=5)
    acc=sklearn.metrics.accuracy_score(Y,pred_y)
    print("accuracy:",acc)

    with open(output_path+"/"+method+"."+feature+".txt","w") as fp:
        fp.write("Preprocess time\t{}\n".format(preprocess_interval))
        fp.write("Embedding time\t{}\n".format(embedding_interval))
        fp.write("Accuracy\t{}\n".format(acc))
def run_reduce_dim(
    adata,
    X_data,
    n_components,
    n_pca_components,
    reduction_method,
    embedding_key,
    n_neighbors,
    neighbor_key,
    cores,
    kwargs,
):
    if reduction_method == "trimap":
        import trimap

        triplemap = trimap.TRIMAP(
            n_inliers=20,
            n_outliers=10,
            n_random=10,
            distance="euclidean",  # cosine
            weight_adj=1000.0,
            apply_pca=False,
        )
        X_dim = triplemap.fit_transform(X_data)

        adata.obsm[embedding_key] = X_dim
        adata.uns[neighbor_key] = {
            "params": {
                "n_neighbors": n_neighbors,
                "method": reduction_method
            },
            # "connectivities": "connectivities",
            # "distances": "distances",
            # "indices": "indices",
        }
    elif reduction_method == "diffusion_map":
        pass
    elif reduction_method.lower() == "tsne":
        try:
            from fitsne import FItSNE
        except ImportError:
            print(
                "Please first install fitsne to perform accelerated tSNE method. Install instruction is "
                "provided here: https://pypi.org/project/fitsne/")

        X_dim = FItSNE(X_data, nthreads=cores)  # use FitSNE

        # bh_tsne = TSNE(n_components = n_components)
        # X_dim = bh_tsne.fit_transform(X)
        adata.obsm[embedding_key] = X_dim
        adata.uns[neighbor_key] = {
            "params": {
                "n_neighbors": n_neighbors,
                "method": reduction_method
            },
            # "connectivities": "connectivities",
            # "distances": "distances",
            # "indices": "indices",
        }
    elif reduction_method == "umap":
        _umap_kwargs = {
            "n_components": n_components,
            "metric": "euclidean",
            "min_dist": 0.5,
            "spread": 1.0,
            "n_epochs": 0,
            "alpha": 1.0,
            "gamma": 1.0,
            "negative_sample_rate": 5,
            "init_pos": "spectral",
            "random_state": 0,
            "densmap": False,
            "dens_lambda": 2.0,
            "dens_frac": 0.3,
            "dens_var_shift": 0.1,
            "output_dens": False,
            "verbose": False,
        }
        umap_kwargs = update_dict(_umap_kwargs, kwargs)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            (
                mapper,
                graph,
                knn_indices,
                knn_dists,
                X_dim,
            ) = umap_conn_indices_dist_embedding(X_data, n_neighbors,
                                                 **umap_kwargs)  # X

        adata.obsm[embedding_key] = X_dim
        knn_dists = knn_to_adj(knn_indices, knn_dists)
        adata.uns[neighbor_key] = {
            "params": {
                "n_neighbors": n_neighbors,
                "method": reduction_method
            },
            # "connectivities": "connectivities",
            # "distances": "distances",
            "indices": knn_indices,
        }

        layer = neighbor_key.split("_")[0] if neighbor_key.__contains__(
            "_") else None
        conn_key = "connectivities" if layer is None else layer + "_connectivities"
        dist_key = "distances" if layer is None else layer + "_distances"

        adata.obsp[conn_key], adata.obsp[dist_key] = graph, knn_dists

        adata.uns["umap_fit"] = {
            "fit": mapper,
            "n_pca_components": n_pca_components
        }
    elif reduction_method == "psl":
        adj_mat, X_dim = psl(X_data, d=n_components,
                             K=n_neighbors)  # this need to be updated
        adata.obsm[embedding_key] = X_dim
        adata.uns[neighbor_key] = adj_mat

    else:
        raise Exception(
            "reduction_method {} is not supported.".format(reduction_method))

    return adata
Esempio n. 9
0
import trimap
import numpy
from sklearn.datasets import fetch_openml

mnist = fetch_openml(name="Fashion-MNIST")
output = open(r"Trimap_2D.txt", "w")

embedding = trimap.TRIMAP().fit_transform(mnist['data'])

output.write("70000 2 \n")
numpy.savetxt("Trimap_2D.txt", embedding)
Esempio n. 10
0
# X = X / 255.
# L = L[:10000]

X = X[:n]
L = L[:n].astype(int)
print("Dataset size = ({},{})".format(X.shape[0], X.shape[1]))

# y_trimap = trimap.TRIMAP(verbose=True).fit_transform(X)

# # Outlier
# index = 9423
# c = np.random.normal(size=X.shape[1]) # create a random direction
# Xo = X.copy()
# Xo[index,:] += 5.0 * c

yo_trimap = trimap.TRIMAP(verbose=True, hub='mp3_gauss').fit_transform(X)
# yo_trimap = umap.UMAP().fit_transform(X)

plt.scatter(yo_trimap[:, 0], yo_trimap[:, 1], s=0.1, c=cols[L, :])
# plt.scatter(yo_trimap[index,0], yo_trimap[index,1], s=80, c='red', marker='x')
plt.show()

# yo_pca = PCA(n_components = 2).fit_transform(Xo)
# plt.scatter(yo_pca[:,0], yo_pca[:,1], s=0.1, c=cols[L,:])
# plt.scatter(yo_pca[index,0], yo_pca[index,1], s=80, c='red', marker='x')
# plt.show()

# AUC
auc = calculate_AUC(X, yo_trimap)
print("AUC: ", auc)
Esempio n. 11
0
    def __init__(self, outdim=2, **kwargs):
        import trimap

        self.reducer = trimap.TRIMAP(n_dims=outdim)
Esempio n. 12
0
def apply_panel_of_manifold_learning_methods(X,color,
                                Color_by_branches=[],precomputed_results={},
                                color_map='cool',ColorByFeature='',
                                variable_names=[],ElMapFolder='',
                                n_neighbors=20, n_components = 2,
                                title_fontsize = 30,points_size = 30,
                                methods_to_apply=[],
                                n_subplots_x = 4, n_subplots_y = 3,
                                figsizex = 20, figsizey =20):
    viz_results = precomputed_results
    #Set figure parameters
    n_points = X.shape[0]
    
    #cmap = plt.cm.Paired
    #cmap = 'hot'
    cmap = color_map
    # cmap = plt.cm.tab20
    plt.style.use('ggplot')
    fig = plt.figure(figsize=(figsizex, figsizey))
    applyAllMethods = True
    if len(methods_to_apply)>0:
        applyAllMethods = False

    color1 = color
    if len(Color_by_branches)>0:
        #color1 = vec_labels_by_branches
        color2 = Color_by_branches
        color_seq = [[1,0,0],[0,1,0],[0,0,1],[0,1,1],[1,0,1],[1,1,0],
             [1,0,0.5],[1,0.5,0],[0.5,0,1],[0.5,1,0],
             [0.5,0.5,1],[0.5,1,0.5],[1,0.5,0.5],
             [0,0.5,0.5],[0.5,0,0.5],[0.5,0.5,0],[0.5,0.5,0.5],[0,0,0.5],[0,0.5,0],[0.5,0,0],
             [0,0.25,0.5],[0,0.5,0.25],[0.25,0,0.5],[0.25,0.5,0],[0.5,0,0.25],[0.5,0.25,0],
             [0.25,0.25,0.5],[0.25,0.5,0.25],[0.5,0.25,0.25],[0.25,0.25,0.5],[0.25,0.5,0.25],
             [0.25,0.25,0.5],[0.25,0.5,0.25],[0.5,0,0.25],[0.5,0.25,0.25]]
        color2_unique, color2_count = np.unique(color2, return_counts=True)
        inds = sorted(range(len(color2_count)), key=lambda k: color2_count[k], reverse=True)
        newc = []
        for i,c in enumerate(color2):
            k = np.where(color2_unique==c)[0][0]
            count = color2_count[k]
            k1 = np.where(inds==k)[0][0]
            k1 = k1%len(color_seq)
            col = color_seq[k1]
            newc.append(col)
        color2 = newc
        color1 = color2

    if not ColorByFeature=='':
        k = variable_names.index(ColorByFeature)
        #color1 = X_original[:,k]
        color1 = X[:,k]

    onlyDraw = not len(precomputed_results)==0

    print('Start computations...')

    # some standard methods
    i = 1

    ####################### PCA #########################
    if applyAllMethods or 'PCA' in methods_to_apply:
        pca = PCA(n_components=n_components)
        t0 = time()
        if  not onlyDraw or not 'PCA' in precomputed_results:
            Y_PCA = pca.fit_transform(X)
            viz_results['PCA'] = Y_PCA
        else:
            Y_PCA = precomputed_results['PCA']
        t1 = time()
        print("PCA: %.2g sec" % (t1 - t0))

        ax = fig.add_subplot(n_subplots_x,n_subplots_y,i)
        plt.scatter(Y_PCA[:, 0], Y_PCA[:, 1], c=color1, cmap=cmap,s=points_size)
        plt.title("PCA",fontdict = {'fontsize' : title_fontsize})
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        plt.axis('tight')


    ### LLE ###
    if applyAllMethods or 'LLE' in methods_to_apply:
        t0 = time()
        if  not onlyDraw or not 'LLE' in precomputed_results:
            print('Computing LLE...')
            Y_LLE = manifold.LocallyLinearEmbedding(n_neighbors=n_neighbors, n_components=n_components,
                                    eigen_solver='auto',
                                    method='standard').fit_transform(X)
            viz_results['LLE'] = Y_LLE
        else:
             Y_LLE = viz_results['LLE']
        t1 = time()
        print("%s: %.2g sec" % ('LLE', t1 - t0))
        i+=1
        ax = fig.add_subplot(n_subplots_x,n_subplots_y,i)
        plt.scatter(Y_LLE[:, 0], Y_LLE[:, 1], c=color1, cmap=cmap,s=points_size)
        plt.title("LLE",fontdict = {'fontsize' : title_fontsize})
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        plt.axis('tight')


    ### Modified LLE ###
    if applyAllMethods or 'MLLE' in methods_to_apply:
        t0 = time()
        if  not onlyDraw  or not 'MLLE' in precomputed_results:
            Y_MLLE = manifold.LocallyLinearEmbedding(n_neighbors=n_neighbors, n_components=n_components,
                                        eigen_solver='auto',
                                        method='modified').fit_transform(X)
            viz_results['MLLE'] = Y_MLLE
        else:
            Y_MLLE = viz_results['MLLE']
        t1 = time()
        print("%s: %.2g sec" % ('Modified LLE', t1 - t0))
        i+=1
        ax = fig.add_subplot(n_subplots_x,n_subplots_y,i)
        plt.scatter(Y_MLLE[:, 0], Y_MLLE[:, 1], c=color1, cmap=cmap,s=points_size)
        plt.title("MLLE",fontdict = {'fontsize' : title_fontsize})
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        plt.axis('tight')


    ### ISOMAP ###
    if applyAllMethods or 'ISOMAP' in methods_to_apply:
        i += 1
        t0 = time()
        if  not onlyDraw or not 'ISOMAP' in precomputed_results:
            Y_ISOMAP = manifold.Isomap(n_neighbors=n_neighbors, n_components=n_components).fit_transform(X)
            viz_results['ISOMAP'] = Y_ISOMAP
        else:
            Y_ISOMAP = viz_results['ISOMAP']
        t1 = time()
        print("Isomap: %.2g sec" % (t1 - t0))
        ax = fig.add_subplot(n_subplots_x,n_subplots_y,i)
        plt.scatter(Y_ISOMAP[:, 0], Y_ISOMAP[:, 1], c=color1, cmap=cmap,s=points_size)
        plt.title("Isomap",fontdict = {'fontsize' : title_fontsize})
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        plt.axis('tight')


    ### MDS ###
    if applyAllMethods or 'MDS' in methods_to_apply:    
        i += 1
        t0 = time()
        if  not onlyDraw or not 'MDS' in precomputed_results:
            mds = manifold.MDS(n_components, max_iter=100, n_init=1)
            Y_MDS = mds.fit_transform(X)
            viz_results['MDS'] = Y_MDS
        else:
            Y_MDS = viz_results['MDS']
        t1 = time()
        print("MDS: %.2g sec" % (t1 - t0))
        ax = fig.add_subplot(n_subplots_x,n_subplots_y,i)
        plt.scatter(Y_MDS[:, 0], Y_MDS[:, 1], c=color1, cmap=cmap,s=points_size)
        plt.title("MDS",fontdict = {'fontsize' : title_fontsize})
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        plt.axis('tight')
        
        
    ### SpectralEmbedding ###
    if applyAllMethods or 'SE' in methods_to_apply:      
        i += 1
        t0 = time()
        if  not onlyDraw  or not 'SE' in precomputed_results:
            se = manifold.SpectralEmbedding(n_components=n_components,n_neighbors=n_neighbors)
            Y_se = se.fit_transform(X)
            viz_results['SE'] = Y_se
        else:
            Y_se = viz_results['SE']
        t1 = time()
        print("SpectralEmbedding: %.2g sec" % (t1 - t0))
        ax = fig.add_subplot(n_subplots_x,n_subplots_y,i)
        plt.scatter(Y_se[:, 0], Y_se[:, 1], c=color1, cmap=cmap,s=points_size)
        plt.title("SpectralEmbedding",fontdict = {'fontsize' : title_fontsize})
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        plt.axis('tight')



    ### t-SNE ###
    if applyAllMethods or 'TSNE' in methods_to_apply:      
        i += 1
        t0 = time()
        if  not onlyDraw  or not 'TSNE' in precomputed_results:
            tsne = manifold.TSNE(n_components=n_components, init='pca', random_state=0, perplexity=100)
            Y_TSNE = tsne.fit_transform(X)
            viz_results['TSNE'] = Y_TSNE
        else:
            Y_TSNE = viz_results['TSNE']
        t1 = time()
        print("t-SNE: %.2g sec" % (t1 - t0))
        ax = fig.add_subplot(n_subplots_x,n_subplots_y,i)
        plt.scatter(Y_TSNE[:, 0], Y_TSNE[:, 1], c=color1, cmap=cmap,s=points_size)
        plt.title("t-SNE",fontdict = {'fontsize' : title_fontsize})
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        plt.axis('tight')


        
    ### UMAP ###
    if applyAllMethods or 'UMAP' in methods_to_apply:          
        i += 1
        t0 = time()
        if  not onlyDraw  or not 'UMAP' in precomputed_results:
            um = UMAP(n_neighbors=n_neighbors,
                  n_components=n_components)
            Y_UMAP = um.fit_transform(X)
            viz_results['UMAP'] = Y_UMAP
        else:
            Y_UMAP = viz_results['UMAP']
        t1 = time()
        print("UMAP: %.2g sec" % (t1 - t0))
        ax = fig.add_subplot(n_subplots_x,n_subplots_y,i)
        plt.scatter(Y_UMAP[:, 0], Y_UMAP[:, 1], c=color1, cmap=cmap,s=points_size)
        plt.title("UMAP",fontdict = {'fontsize' : title_fontsize})
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        plt.axis('tight')
        
    ### TRIMAP ###
    if applyAllMethods or 'TRIMAP' in methods_to_apply:              
        t0 = time()
        if  not onlyDraw  or not 'TRIMAP' in precomputed_results:
            Y_TRIMAP = trimap.TRIMAP(verbose=False).fit_transform(X)
            viz_results['TRIMAP'] = Y_TRIMAP
        else:
            Y_TRIMAP = viz_results['TRIMAP']
        t1 = time()
        print("TRIMAP: %.2g sec" % (t1 - t0))
        i += 1
        ax = fig.add_subplot(n_subplots_x,n_subplots_y,i)
        plt.scatter(Y_TRIMAP[:, 0], Y_TRIMAP[:, 1], c=color1, cmap=cmap,s=points_size)
        plt.title("TRIMAP",fontdict = {'fontsize' : title_fontsize})
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        plt.axis('tight')

        
    ### MDE ###
    if applyAllMethods or 'MDE' in methods_to_apply:              
        t0 = time()
        if  not onlyDraw  or not 'MDE' in precomputed_results:
            Y_MDE = pymde.preserve_neighbors(X, embedding_dim=2, verbose=False).embed()        
            viz_results['MDE'] = Y_MDE
        else:
            Y_MDE = viz_results['MDE']
        t1 = time()
        print("MDE: %.2g sec" % (t1 - t0))
        i += 1
        ax = fig.add_subplot(n_subplots_x,n_subplots_y,i)
        plt.scatter(Y_MDE[:, 0], Y_MDE[:, 1], c=color1, cmap=cmap,s=points_size)
        plt.title("MDE",fontdict = {'fontsize' : title_fontsize})
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        plt.axis('tight')
                

    ### Autoencoder ###
    if applyAllMethods or 'AUTOENCODER' in methods_to_apply:        
        layer_sizes = [64,32,16,8,4]
        #encoder
        inputs = Input(shape=(X.shape[1],), name='encoder_input')
        x = inputs
        for size in layer_sizes:
            x = Dense(size, activation='relu',kernel_initializer='he_uniform')(x)
        latent = Dense(n_components,kernel_initializer='he_uniform', name='latent_vector')(x)
        encoder = Model(inputs, latent, name='encoder')

        #decoder
        latent_inputs = Input(shape=(n_components,), name='decoder_input')
        x = latent_inputs
        for size in layer_sizes[::-1]:
            x = Dense(size, activation='relu',kernel_initializer='he_uniform')(x)
        outputs = Dense(X.shape[1] ,activation='sigmoid',kernel_initializer='he_uniform',name='decoder_output')(x)
        decoder = Model(latent_inputs, outputs, name='decoder')

        #autoencoder
        autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder')

        #model summary
        # encoder.summary()
        # decoder.summary()
        # autoencoder.summary()
        X_01 = (X-X.min())/(X.max()-X.min())
        autoencoder.compile(loss='mse', optimizer='adam')
        t0 = time()
        if  not onlyDraw  or not 'AUTOENCODER' in precomputed_results:
            autoencoder.fit(x=X_01,y=X_01,epochs=200,verbose=0)
            Y_AUTOENCODER = encoder.predict(X_01)
            viz_results['AUTOENCODER'] = Y_AUTOENCODER
        else:
            Y_AUTOENCODER = viz_results['AUTOENCODER']
        t1 = time()
        print("Autoencoder: %.2g sec" % (t1 - t0))

        i += 1
        ax = fig.add_subplot(n_subplots_x,n_subplots_y,i)
        plt.scatter(Y_AUTOENCODER[:, 0], Y_AUTOENCODER[:, 1], c=color1, cmap=cmap,s=points_size)
        plt.title("Autoencoder",fontdict = {'fontsize' : title_fontsize})
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        plt.axis('tight')        

    ### VAE ###
    if applyAllMethods or 'VAE' in methods_to_apply:        
        def sampling(args):
            z_mean, z_log_var = args
            epsilon = K.random_normal(shape=(n_components,))
            return z_mean + K.exp(z_log_var) * epsilon

        layer_sizes = [64,32,16,8]
        #encoder
        inputs = Input(shape=(X.shape[1],), name='encoder_input')
        x = inputs
        for size in layer_sizes:
            x = Dense(size, activation='relu',kernel_initializer='he_uniform')(x)

        z_mean = Dense(n_components,kernel_initializer='he_uniform', name='latent_mean')(x)
        z_log_var = Dense(n_components,kernel_initializer='he_uniform', name='latent_sigma')(x)

        z = Lambda(sampling, output_shape=(n_components,))([z_mean, z_log_var])
        encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')

        #decoder
        latent_inputs = Input(shape=(n_components,), name='decoder_input_sampling')
        x = latent_inputs
        for size in layer_sizes[::-1]:
            x = Dense(size, activation='relu',kernel_initializer='he_uniform')(x)
        outputs = Dense(X.shape[1] ,activation='sigmoid',kernel_initializer='he_uniform',name='decoder_output')(x)
        decoder = Model(latent_inputs, outputs, name='decoder')

        #autoencoder
        vae = Model(inputs, decoder(encoder(inputs)[2]), name='vae')

        def vae_loss(x, x_decoded_mean):
            xent_loss = K.mean(K.square((x- x_decoded_mean)))
            kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
            #print(type(xent_loss))
            #print(type(kl_loss))
            #return K.sum(xent_loss,kl_loss)
            #return tf.convert_to_tensor(kl_loss)
            return xent_loss+kl_loss
        vae.compile(optimizer='adam', loss=vae_loss)

        X_01 = (X-X.min())/(X.max()-X.min())
        #print(X_01)
        #X_01 = X.copy()
        t0 = time()
        if  not onlyDraw  or not 'VAE' in precomputed_results:
            vae.fit(x=X_01,y=X_01,epochs=200,verbose=0)
            Y_VAE = encoder.predict(X_01)[0]
            viz_results['VAE'] = Y_VAE
        else:
            Y_VAE = viz_results['VAE']
        t1 = time()
        print("VAE: %.2g sec" % (t1 - t0))
        i += 1
        ax = fig.add_subplot(n_subplots_x,n_subplots_y,i)
        plt.scatter(Y_VAE[:, 0], Y_VAE[:, 1], c=color1, cmap=cmap)
        plt.title("VAE",fontdict = {'fontsize' : title_fontsize})
        ax.xaxis.set_major_formatter(NullFormatter())
        ax.yaxis.set_major_formatter(NullFormatter())
        plt.axis('tight')

    plt.tight_layout()
    
    return viz_results
Esempio n. 13
0
def run_DR_Algorithm(name, data_features, data_target, int_dim=2):
    """
    Runs each DR algorithm and returns the embedding.
    Parameters
    ----------
    name : String
        start time
    data_features : nD array
        original features
    data_target : list
        original labels
    int_dim : integer
        intrinsic dimensionality
    Returns
    ----------
    points : nD array
        embedding
    """

    if name == "UMAP":
        reducer = umap.UMAP()
        points = reducer.fit_transform(data_features)
        plot_scatter(points, data_target)

    elif name == "tSNE":
        tsne = TSNE(n_components=int_dim,
                    n_iter=1000,
                    random_state=RANDOM_STATE)
        points = tsne.fit_transform(data_features)
        plot_scatter(points, data_target)

    elif name == "PCA":
        pca = PCA(n_components=int_dim)
        points = pca.fit_transform(data_features)
        plot_scatter(points, data_target)

    elif name == "Trimap":
        points = trimap.TRIMAP().fit_transform(data_features)
        plot_scatter(points, data_target)

    elif name == "FIt_SNE":
        points = fast_tsne(data_features, perplexity=50, seed=42)
        plot_scatter(points, data_target)

    elif name == "M_Core_tSNE":
        tsne = M_TSNE(n_jobs=4)
        points = tsne.fit_transform(data_features)
        plot_scatter(points, data_target)

    elif name == "dPCA":
        dpca = dPCA.dPCA(labels='st', n_components=int_dim)
        points = dpca.fit_transform(data_features)
        plot_scatter(points, data_target)

    elif name == "LTSA":
        ltsa = LocallyLinearEmbedding(n_neighbors=12,
                                      n_components=int_dim,
                                      method='ltsa')
        points = ltsa.fit_transform(data_features)
        plot_scatter(points, data_target)

    elif name == "MLLE":
        ltsa = LocallyLinearEmbedding(n_neighbors=6,
                                      n_components=int_dim,
                                      method='modified')
        points = ltsa.fit_transform(data_features)
        plot_scatter(points, data_target)

    elif name == "openTSNE":
        tsne = opTSNE(
            n_components=int_dim,
            perplexity=30,
            learning_rate=200,
            n_jobs=4,
            initialization="pca",
            metric="euclidean",
            early_exaggeration_iter=250,
            early_exaggeration=12,
            n_iter=750,
            neighbors="exact",
            negative_gradient_method="bh",
        )
        points = tsne.fit(data_features)
        plot_scatter(points, data_target)

    elif name == "MDS":
        mds = MDS(n_components=int_dim)
        points = mds.fit_transform(data_features)
        plot_scatter(points, data_target)

    elif name == "Isomap":
        isomap = Isomap(n_components=int_dim)
        points = isomap.fit_transform(data_features)
        plot_scatter(points, data_target)

    elif name == "KernelPCA":
        kpca = KernelPCA(n_components=int_dim, kernel='linear')
        points = kpca.fit_transform(data_features)
        plot_scatter(points, data_target)

    elif name == "LLE":
        lle = LocallyLinearEmbedding(n_components=int_dim)
        points = lle.fit_transform(data_features)
        plot_scatter(points, data_target)

    elif name == "HessianLLE":
        lapeig = LocallyLinearEmbedding(n_neighbors=6,
                                        n_components=int_dim,
                                        method='hessian')
        points = lapeig.fit_transform(data_features)
        plot_scatter(points, data_target)

    elif name == "LEM":
        lapeig = SpectralEmbedding(n_components=int_dim)
        points = lapeig.fit_transform(data_features)
        plot_scatter(points, data_target)

    elif name == "LVis":
        outdim = int_dim
        threads = 24
        samples = -1
        prop = -1
        alpha = -1
        trees = -1
        neg = -1
        neigh = -1
        gamma = -1
        perp = -1

        with open('largevis_input.txt', 'w') as out:
            out.write("{}\t{}\n".format(*data_features.as_matrix().shape))
            for row in data_features.as_matrix():
                out.write('\t'.join(row.astype(str)) + '\n')

        LargeVis.loadfile("largevis_input.txt")
        points = LargeVis.run(outdim, threads, samples, prop, alpha, trees,
                              neg, neigh, gamma, perp)
        plot_scatter(np.array(points), data_target)

    return points
Esempio n. 14
0
 def do_trimap():
     return trimap.TRIMAP(n_dims=embed_dimensions,
                          verbose=False).fit_transform(X)
Esempio n. 15
0
def reduceDimension(adata,
                    n_pca_components=25,
                    n_components=2,
                    n_neighbors=10,
                    reduction_method='trimap',
                    velocity_key='velocity_S',
                    cores=1):
    """Compute a low dimension reduction projection of an annodata object first with PCA, followed by non-linear dimension reduction methods

    Arguments
    ---------
    adata: :class:`~anndata.AnnData`
        an Annodata object 
    n_pca_components: 'int' (optional, default 50)
        Number of PCA components.  
    n_components: 'int' (optional, default 50)
        The dimension of the space to embed into.
    n_neighbors: 'int' (optional, default 10)
        Number of nearest neighbors when constructing adjacency matrix. 
    reduction_method: 'str' (optional, default trimap)
        Non-linear dimension reduction method to further reduce dimension based on the top n_pca_components PCA components. Currently, PSL 
        (probablistic structure learning, a new dimension reduction by us), tSNE (fitsne instead of traditional tSNE used) or umap are supported.
    velocity_key: 'str' (optional, default velocity_S)
        The dictionary key that corresponds to the estimated velocity values. 
    cores: `int` (optional, default `1`)
        Number of cores. Used only when the tSNE reduction_method is used.

    Returns
    -------
    Returns an updated `adata` with reduced dimension data for spliced counts, projected future transcript counts 'Y_dim' and adjacency matrix when possible.
    """

    n_obs = adata.shape[0]

    if 'use_for_dynamo' in adata.var.keys():
        X = adata.X[:, adata.var.use_for_dynamo.values]
        if velocity_key is not None:
            X_t = adata.X[:, adata.var.use_for_dynamo.values] + adata.layers[
                velocity_key][:, adata.var.use_for_dynamo.values]
    else:
        X = adata.X
        if velocity_key is not None:
            X_t = adata.X + adata.layers[velocity_key]

    if ((not 'X_pca' in adata.obsm.keys())
            or 'pca_fit' not in adata.uns.keys()) or reduction_method is "pca":
        transformer = TruncatedSVD(n_components=n_pca_components + 1,
                                   random_state=0)
        X_fit = transformer.fit(X)
        X_pca = X_fit.transform(X)[:, 1:]
        adata.obsm['X_pca'] = X_pca
        if velocity_key is not None and "_velocity_pca" not in adata.obsm.keys(
        ):
            X_t_pca = X_fit.transform(X_t)[:, 1:]
            adata.obsm['_velocity_pca'] = X_t_pca - X_pca
    else:
        X_pca = adata.obsm['X_pca'][:, :n_pca_components]
        if velocity_key is not None and "_velocity_pca" not in adata.obsm.keys(
        ):
            X_t_pca = adata.uns['pca_fit'].fit_transform(X_t)
            adata.obsm['_velocity_pca'] = X_t_pca[:, 1:(n_pca_components +
                                                        1)] - X_pca
        adata.obsm['X_pca'] = X_pca

    if reduction_method is "trimap":
        import trimap
        triplemap = trimap.TRIMAP(
            n_inliers=20,
            n_outliers=10,
            n_random=10,
            distance='angular',  # cosine
            weight_adj=1000.0,
            apply_pca=False)
        X_dim = triplemap.fit_transform(X_pca)

        adata.obsm['X_trimap'] = X_dim
        adata.uns['neighbors'] = {'params': {'n_neighbors': n_neighbors, 'method': reduction_method}, 'connectivities': None, \
                                  'distances': None, 'indices': None}
    elif reduction_method is 'tSNE':
        try:
            from fitsne import FItSNE
        except ImportError:
            print(
                'Please first install fitsne to perform accelerated tSNE method. Install instruction is provided here: https://pypi.org/project/fitsne/'
            )

        X_dim = FItSNE(X_pca, nthreads=cores)  # use FitSNE

        # bh_tsne = TSNE(n_components = n_components)
        # X_dim = bh_tsne.fit_transform(X_pca)
        adata.obsm['X_tSNE'] = X_dim
        adata.uns['neighbors'] = {'params': {'n_neighbors': n_neighbors, 'method': reduction_method}, 'connectivities': None, \
                                  'distances': None, 'indices': None}
    elif reduction_method is 'umap':
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            graph, knn_indices, knn_dists, X_dim = umap_conn_indices_dist_embedding(
                X_pca)  # X_pca
        adata.obsm['X_umap'] = X_dim
        adata.uns['neighbors'] = {'params': {'n_neighbors': n_neighbors, 'method': reduction_method}, 'connectivities': graph, \
                                  'distances': knn_dists, 'indices': knn_indices}
    elif reduction_method is 'psl':
        adj_mat, X_dim = psl_py(X_pca, d=n_components,
                                K=n_neighbors)  # this need to be updated
        adata.obsm['X_psl'] = X_dim
        adata.uns['PSL_adj_mat'] = adj_mat

    else:
        raise Exception(
            'reduction_method {} is not supported.'.format(reduction_method))

    return adata