def distance(character1, character2, novelLength, t):

    character1 = copy.deepcopy(character1)
    character2 = copy.deepcopy(character2)

    # First character needs to have more appearances
    if len(character1) < len(character2):
        tmp = character1
        character1 = character2
        character2 = tmp

    character1new = []
    used_list = []

    j_nearest = None

    # Find a subset of appearances of the first character so that the first character is as close as possible to the
    # second character
    for i in range(len(character2)):

        min_distance = float('inf')

        for j in range(len(character1)):

            curr_distance = abs(character1[j] - character2[i])

            if curr_distance < min_distance and j not in used_list:

                min_distance = curr_distance
                j_nearest = j

        character1new.append(character1[j_nearest])
        used_list.append(j_nearest)

    character1new.sort()

    # Normalize character appearances by dividing them with novel length
    for i in range(len(character1new)):
        character1new[i] /= novelLength
        character2[i] /= novelLength

    # Raise elements to the power of 1 + t
    for i in range(len(character1new)):
        character1new[i] **= (1+t)

    for i in range(len(character1new)):
        character2[i] **= (1+t)

    return ot.wasserstein_1d(character1new, character2, p=0.5)
Beispiel #2
0
def test_wass_1d():
    # test emd1d gives similar results as emd
    n = 20
    m = 30
    rng = np.random.RandomState(0)
    u = rng.randn(n, 1)
    v = rng.randn(m, 1)

    M = ot.dist(u, v, metric='sqeuclidean')

    G, log = ot.emd([], [], M, log=True)
    wass = log["cost"]

    wass1d = ot.wasserstein_1d(u, v, [], [], p=2.)

    # check loss is similar
    np.testing.assert_allclose(np.sqrt(wass), wass1d)
Beispiel #3
0
def predictive_distribution_wasserstein_distance(predictive_distribution1,
                                                 predictive_distribution2,
                                                 n_samples=1000,
                                                 seed=0):
    predictive_samples1 = np.squeeze(
        predictive_distribution1.sample(n_samples, seed=seed).numpy())
    predictive_samples2 = np.squeeze(
        predictive_distribution2.sample(n_samples, seed=seed + 1).numpy())
    wds = []
    for i_test_point in range(predictive_samples1.shape[1]):
        samples1 = predictive_samples1[:, i_test_point]
        samples2 = predictive_samples2[:, i_test_point]
        # ot's  Wasserstein distance is about 30% faster than scipy's
        # wd = wasserstein_distance(samples1, samples2)
        wd = ot.wasserstein_1d(samples1, samples2)
        wds.append(wd)
    return np.mean(wds)
    kdeplot(ref_sample, label="kde-ref", color="darkred")
    plt.grid(linestyle="-.", color="lightgrey")
    plt.legend()
plt.show()

# calculate average Wp distance with 100 points in test dataset
from ot import wasserstein_1d
predlist = []
predlistY = []
ideallist = []
for i in range(100):
    Y_c, W_c = reg.predict_distribution(X[N_train + i])
    predlist += [
        wasserstein_1d(p=2,
                       x_a=np.random.choice(Y_c, p=W_c, size=N_train),
                       x_b=np.random.normal(obj_func(X[N_train + i]),
                                            np.sqrt(obj_func2(X[N_train + i])),
                                            100000))
    ]
    predlistY += [
        wasserstein_1d(p=2,
                       x_a=Y[:N_train],
                       x_b=np.random.normal(obj_func(X[N_train + i]),
                                            np.sqrt(obj_func2(X[N_train + i])),
                                            100000))
    ]
    ideallist += [
        wasserstein_1d(p=2,
                       x_a=np.random.normal(obj_func(X[N_train + i]),
                                            np.sqrt(obj_func2(X[N_train + i])),
                                            N_train),
def distance_stats(pre, post, downsample=False, verbose=True):
    """
    Tests for correlation between Euclidean cell-cell distances before and after 
    transformation by a function or DR algorithm.

    Parameters
    ----------

    pre : np.array
        vector of unique distances (pdist()) or distance matrix of shape (n_cells, 
        m_cells), i.e. (cdist()) before transformation/projection
    post : np.array
        vector of unique distances (pdist()) or distance matrix of shape (n_cells, 
        m_cells), i.e. (cdist()) after transformation/projection
    downsample : int, optional (default=False)
        number of distances to downsample to. maximum of 50M (~10k cells, if 
        symmetrical) is recommended for performance.
    verbose : bool, optional (default=True)
        print progress statements to console

    Returns
    -------

    pre : np.array
        vector of normalized unique distances (pdist()) or distance matrix of shape 
        (n_cells, m_cells), before transformation/projection
    post : np.array
        vector of normalized unique distances (pdist()) or distance matrix of shape 
        (n_cells, m_cells), after transformation/projection
    corr_stats : list
        output of `pearsonr()` function correlating the two normalized unique distance 
        vectors
    EMD : float
        output of `wasserstein_1d()` function calculating the Earth Mover's Distance 
        between the two normalized unique distance vectors

    1) performs Pearson correlation of distance distributions
    2) normalizes unique distances using min-max standardization for each dataset
    3) calculates Wasserstein or Earth-Mover's Distance for normalized distance 
    distributions between datasets
    """
    # make sure the number of cells in each matrix is the same
    assert (
        pre.shape == post.shape
    ), 'Matrices contain different number of distances.\n{} in "pre"\n{} in "post"\n'.format(
        pre.shape[0], post.shape[0])

    # if distance matrix (mA x mB, result of cdist), flatten to unique cell-cell distances
    if pre.ndim == 2:
        if verbose:
            print(
                "Flattening pre-transformation distance matrix into 1D array..."
            )
        # if symmetric, only keep unique values (above diagonal)
        if np.allclose(pre, pre.T, rtol=1e-05, atol=1e-08):
            pre = pre[np.triu_indices(n=pre.shape[0], k=1)]
        # otherwise, flatten all distances
        else:
            pre = pre.flatten()

    # if distance matrix (mA x mB, result of cdist), flatten to unique cell-cell distances
    if post.ndim == 2:
        if verbose:
            print(
                "Flattening post-transformation distance matrix into 1D array..."
            )
        # if symmetric, only keep unique values (above diagonal)
        if np.allclose(post, post.T, rtol=1e-05, atol=1e-08):
            post = post[np.triu_indices(n=post.shape[0], k=1)]
        # otherwise, flatten all distances
        else:
            post = post.flatten()

    # if dataset is large, randomly downsample to reasonable number of distances for calculation
    if downsample:
        assert downsample < len(
            pre
        ), "Must provide downsample value smaller than total number of cell-cell distances provided in pre and post"
        if verbose:
            print("Downsampling to {} total cell-cell distances...".format(
                downsample))
        idx = np.random.choice(np.arange(len(pre)), downsample, replace=False)
        pre = pre[idx]
        post = post[idx]

    # calculate correlation coefficient using Pearson correlation
    if verbose:
        print("Correlating distances")
    corr_stats = pearsonr(x=pre, y=post)

    # min-max normalization for fair comparison of probability distributions
    if verbose:
        print("Normalizing unique distances")
    pre -= pre.min()
    pre /= pre.ptp()

    post -= post.min()
    post /= post.ptp()

    # calculate EMD for the distance matrices
    # by default, downsample to 50M distances to speed processing time,
    # since this function often breaks with larger distributions
    if verbose:
        print("Calculating Earth-Mover's Distance between distributions")
    if len(pre) > 50000000:
        idx = np.random.choice(np.arange(len(pre)), 50000000, replace=False)
        pre_EMD = pre[idx]
        post_EMD = post[idx]
        EMD = wasserstein_1d(pre_EMD, post_EMD)
    else:
        EMD = wasserstein_1d(pre, post)

    return pre, post, corr_stats, EMD
def cluster_arrangement_sc(
        adata,
        pre,
        post,
        obs_col,
        IDs,
        ID_names=None,
        figsize=(4, 4),
        legend=True,
        ax_labels=["Native", "Latent"],
):
    """
    Determines pairwise distance preservation between 3 IDs from `adata.obs[obs_col]`

    Parameters
    ----------

    adata : anndata.AnnData
        anndata object to pull dimensionality reduction from
    pre : np.array
        matrix to subset as pre-transformation (i.e. `adata.X`)
    post : np.array
        matrix to subset as pre-transformation (i.e. `adata.obsm["X_pca"]`)
    obs_col : str
        name of column in `adata.obs` to use as cell IDs (i.e. "louvain")
    IDs : list of int (len==3)
        list of THREE ID indices to compare (i.e. [0,1,2])
    figsize : tuple of float, optional (default=(4,4))
        size of resulting figure
    legend : bool, optional (default=True)
        display legend on plot
    ax_labels : list of str (len==2), optional (default=["Native","Latent"])
        list of two strings for x and y axis labels, respectively. if False, exclude 
        axis labels.

    Returns
    -------

    corr_stats : list
        list of outputs of `pearsonr()` function correlating the three normalized 
        unique distance vectors in a pairwise fashion
    EMD : float
        list of outputs of `wasserstein_1d()` function calculating the Earth Mover's 
        Distance between the three normalized unique distance vectors in a pairwise 
        fashion

    Outputs jointplot with scatter of pairwise distance correlations, with marginal 
    KDE plots showing density of each native and latent distance vector
    """
    # distance calculations for pre_obj
    dist_0_1 = cdist(pre[adata.obs[obs_col] == IDs[0]],
                     pre[adata.obs[obs_col] == IDs[1]]).flatten()
    dist_0_2 = cdist(pre[adata.obs[obs_col] == IDs[0]],
                     pre[adata.obs[obs_col] == IDs[2]]).flatten()
    dist_1_2 = cdist(pre[adata.obs[obs_col] == IDs[1]],
                     pre[adata.obs[obs_col] == IDs[2]]).flatten()
    # combine and min-max normalize
    dist = np.append(np.append(dist_0_1, dist_0_2), dist_1_2)
    dist -= dist.min()
    dist /= dist.ptp()
    # split normalized distances by cluster pair
    dist_norm_0_1 = dist[:dist_0_1.shape[0]]
    dist_norm_0_2 = dist[dist_0_1.shape[0]:dist_0_1.shape[0] +
                         dist_0_2.shape[0]]
    dist_norm_1_2 = dist[dist_0_1.shape[0] + dist_0_2.shape[0]:]

    # distance calculations for post_obj
    post_0_1 = cdist(post[adata.obs[obs_col] == IDs[0]],
                     post[adata.obs[obs_col] == IDs[1]]).flatten()
    post_0_2 = cdist(post[adata.obs[obs_col] == IDs[0]],
                     post[adata.obs[obs_col] == IDs[2]]).flatten()
    post_1_2 = cdist(post[adata.obs[obs_col] == IDs[1]],
                     post[adata.obs[obs_col] == IDs[2]]).flatten()
    # combine and min-max normalize
    post = np.append(np.append(post_0_1, post_0_2), post_1_2)
    post -= post.min()
    post /= post.ptp()
    # split normalized distances by cluster pair
    post_norm_0_1 = post[:post_0_1.shape[0]]
    post_norm_0_2 = post[post_0_1.shape[0]:post_0_1.shape[0] +
                         post_0_2.shape[0]]
    post_norm_1_2 = post[post_0_1.shape[0] + post_0_2.shape[0]:]

    # calculate EMD and Pearson correlation stats
    EMD = [
        wasserstein_1d(dist_norm_0_1, post_norm_0_1),
        wasserstein_1d(dist_norm_0_2, post_norm_0_2),
        wasserstein_1d(dist_norm_1_2, post_norm_1_2),
    ]
    corr_stats = [
        pearsonr(x=dist_0_1, y=post_0_1)[0],
        pearsonr(x=dist_0_2, y=post_0_2)[0],
        pearsonr(x=dist_1_2, y=post_1_2)[0],
    ]

    if ID_names is None:
        ID_names = IDs.copy()

    # generate jointplot
    g = sns.JointGrid(x=dist, y=post, space=0, height=figsize[0])
    g.plot_joint(plt.hist2d, bins=50, cmap=sns.cubehelix_palette(as_cmap=True))
    sns.kdeplot(
        dist_norm_0_1,
        shade=False,
        bw_method=0.01,
        ax=g.ax_marg_x,
        color="darkorange",
        label=ID_names[0] + " - " + ID_names[1],
        legend=legend,
    )
    sns.kdeplot(
        dist_norm_0_2,
        shade=False,
        bw_method=0.01,
        ax=g.ax_marg_x,
        color="darkgreen",
        label=ID_names[0] + " - " + ID_names[2],
        legend=legend,
    )
    sns.kdeplot(
        dist_norm_1_2,
        shade=False,
        bw_method=0.01,
        ax=g.ax_marg_x,
        color="darkred",
        label=ID_names[1] + " - " + ID_names[2],
        legend=legend,
    )
    if legend:
        g.ax_marg_x.legend(loc=(1.01, 0.1))

    sns.kdeplot(
        y=post_norm_0_1,
        shade=False,
        bw_method=0.01,
        color="darkorange",
        ax=g.ax_marg_y,
    )
    sns.kdeplot(
        y=post_norm_0_2,
        shade=False,
        bw_method=0.01,
        color="darkgreen",
        ax=g.ax_marg_y,
    )
    sns.kdeplot(
        y=post_norm_1_2,
        shade=False,
        bw_method=0.01,
        color="darkred",
        ax=g.ax_marg_y,
    )
    g.ax_joint.plot(
        np.linspace(max(dist.min(), post.min()), 1, 100),
        np.linspace(max(dist.min(), post.min()), 1, 100),
        linestyle="dashed",
        color=sns.cubehelix_palette()[-1],
    )  # plot identity line as reference for regression
    if ax_labels:
        plt.xlabel(ax_labels[0],
                   fontsize="xx-large",
                   color=sns.cubehelix_palette()[-1])
        plt.ylabel(ax_labels[1],
                   fontsize="xx-large",
                   color=sns.cubehelix_palette()[2])

    plt.tick_params(labelleft=False, labelbottom=False)

    return corr_stats, EMD
Beispiel #7
0
def cluster_arrangement_sc(
    adata,
    pre,
    post,
    obs_col,
    IDs,
    ID_names=None,
    figsize=(4, 4),
    legend=True,
    ax_labels=["Native", "Latent"],
):
    """
    determine pairwise distance preservation between 3 IDs from adata.obs[obs_col]
        adata = anndata object to pull dimensionality reduction from
        pre = matrix to subset as pre-transformation (i.e. adata.X)
        post = matrix to subset as pre-transformation (i.e. adata.obsm['X_pca'])
        obs_col = name of column in adata.obs to use as cell IDs (i.e. 'louvain')
        IDs = list of THREE IDs to compare (i.e. [0,1,2])
        figsize = size of resulting axes
        legend = display legend on plot
        ax_labels = list of two strings for x and y axis labels, respectively. if False, exclude axis labels.
    """
    # distance calculations for pre_obj
    dist_0_1 = cdist(
        pre[adata.obs[obs_col] == IDs[0]], pre[adata.obs[obs_col] == IDs[1]]
    ).flatten()
    dist_0_2 = cdist(
        pre[adata.obs[obs_col] == IDs[0]], pre[adata.obs[obs_col] == IDs[2]]
    ).flatten()
    dist_1_2 = cdist(
        pre[adata.obs[obs_col] == IDs[1]], pre[adata.obs[obs_col] == IDs[2]]
    ).flatten()
    # combine and min-max normalize
    dist = np.append(np.append(dist_0_1, dist_0_2), dist_1_2)
    dist -= dist.min()
    dist /= dist.ptp()
    # split normalized distances by cluster pair
    dist_norm_0_1 = dist[: dist_0_1.shape[0]]
    dist_norm_0_2 = dist[dist_0_1.shape[0] : dist_0_1.shape[0] + dist_0_2.shape[0]]
    dist_norm_1_2 = dist[dist_0_1.shape[0] + dist_0_2.shape[0] :]

    # distance calculations for post_obj
    post_0_1 = cdist(
        post[adata.obs[obs_col] == IDs[0]], post[adata.obs[obs_col] == IDs[1]]
    ).flatten()
    post_0_2 = cdist(
        post[adata.obs[obs_col] == IDs[0]], post[adata.obs[obs_col] == IDs[2]]
    ).flatten()
    post_1_2 = cdist(
        post[adata.obs[obs_col] == IDs[1]], post[adata.obs[obs_col] == IDs[2]]
    ).flatten()
    # combine and min-max normalize
    post = np.append(np.append(post_0_1, post_0_2), post_1_2)
    post -= post.min()
    post /= post.ptp()
    # split normalized distances by cluster pair
    post_norm_0_1 = post[: post_0_1.shape[0]]
    post_norm_0_2 = post[post_0_1.shape[0] : post_0_1.shape[0] + post_0_2.shape[0]]
    post_norm_1_2 = post[post_0_1.shape[0] + post_0_2.shape[0] :]

    # calculate EMD and Pearson correlation stats
    EMD = [
        wasserstein_1d(dist_norm_0_1, post_norm_0_1),
        wasserstein_1d(dist_norm_0_2, post_norm_0_2),
        wasserstein_1d(dist_norm_1_2, post_norm_1_2),
    ]
    corr_stats = [
        pearsonr(x=dist_0_1, y=post_0_1)[0],
        pearsonr(x=dist_0_2, y=post_0_2)[0],
        pearsonr(x=dist_1_2, y=post_1_2)[0],
    ]

    if ID_names is None:
        ID_names = IDs.copy()

    # generate jointplot
    g = sns.JointGrid(x=dist, y=post, space=0, height=figsize[0])
    g.plot_joint(plt.hist2d, bins=50, cmap=sns.cubehelix_palette(as_cmap=True))
    sns.kdeplot(
        dist_norm_0_1,
        shade=False,
        bw_method=0.01,
        ax=g.ax_marg_x,
        color="darkorange",
        label=ID_names[0] + " - " + ID_names[1],
        legend=legend,
    )
    sns.kdeplot(
        dist_norm_0_2,
        shade=False,
        bw_method=0.01,
        ax=g.ax_marg_x,
        color="darkgreen",
        label=ID_names[0] + " - " + ID_names[2],
        legend=legend,
    )
    sns.kdeplot(
        dist_norm_1_2,
        shade=False,
        bw_method=0.01,
        ax=g.ax_marg_x,
        color="darkred",
        label=ID_names[1] + " - " + ID_names[2],
        legend=legend,
    )
    if legend:
        g.ax_marg_x.legend(loc=(1.01, 0.1))

    sns.kdeplot(
        y=post_norm_0_1,
        shade=False,
        bw_method=0.01,
        color="darkorange",
        ax=g.ax_marg_y,
    )
    sns.kdeplot(
        y=post_norm_0_2,
        shade=False,
        bw_method=0.01,
        color="darkgreen",
        ax=g.ax_marg_y,
    )
    sns.kdeplot(
        y=post_norm_1_2,
        shade=False,
        bw_method=0.01,
        color="darkred",
        ax=g.ax_marg_y,
    )
    g.ax_joint.plot(
        np.linspace(max(dist.min(), post.min()), 1, 100),
        np.linspace(max(dist.min(), post.min()), 1, 100),
        linestyle="dashed",
        color=sns.cubehelix_palette()[-1],
    )  # plot identity line as reference for regression
    if ax_labels:
        plt.xlabel(ax_labels[0], fontsize="xx-large", color=sns.cubehelix_palette()[-1])
        plt.ylabel(ax_labels[1], fontsize="xx-large", color=sns.cubehelix_palette()[2])

    plt.tick_params(labelleft=False, labelbottom=False)

    return corr_stats, EMD
Beispiel #8
0
def cluster_arrangement(
    pre_obj,
    post_obj,
    clusters,
    cluster_names=None,
    figsize=(6, 6),
    pre_transform="arcsinh",
    legend=True,
    ax_labels=["Native", "Latent"],
):
    """
    determine pairwise distance preservation between 3 clusters
        pre_obj = RNA_counts object
        post_obj = DR object
        clusters = list of barcode IDs i.e. ['0','1','2'] to calculate pairwise distances between clusters 0, 1 and 2
        cluster_names = list of cluster names for labeling i.e. ['Bipolar Cells','Rods','Amacrine Cells'] for clusters
            0, 1 and 2, respectively
        figsize = size of output figure to plot
        pre_transform = apply transformation to pre_obj counts? (None, 'arcsinh', or 'log2')
        legend = display legend on plot
        ax_labels = list of two strings for x and y axis labels, respectively. if False, exclude axis labels.
    """
    # distance calculations for pre_obj
    dist_0_1 = pre_obj.barcode_distance_matrix(
        ranks=[clusters[0], clusters[1]], transform=pre_transform
    ).flatten()
    dist_0_2 = pre_obj.barcode_distance_matrix(
        ranks=[clusters[0], clusters[2]], transform=pre_transform
    ).flatten()
    dist_1_2 = pre_obj.barcode_distance_matrix(
        ranks=[clusters[1], clusters[2]], transform=pre_transform
    ).flatten()
    # combine and min-max normalize
    dist = np.append(np.append(dist_0_1, dist_0_2), dist_1_2)
    dist -= dist.min()
    dist /= dist.ptp()
    # split normalized distances by cluster pair
    dist_norm_0_1 = dist[: dist_0_1.shape[0]]
    dist_norm_0_2 = dist[dist_0_1.shape[0] : dist_0_1.shape[0] + dist_0_2.shape[0]]
    dist_norm_1_2 = dist[dist_0_1.shape[0] + dist_0_2.shape[0] :]

    # distance calculations for post_obj
    post_0_1 = post_obj.barcode_distance_matrix(
        ranks=[clusters[0], clusters[1]]
    ).flatten()
    post_0_2 = post_obj.barcode_distance_matrix(
        ranks=[clusters[0], clusters[2]]
    ).flatten()
    post_1_2 = post_obj.barcode_distance_matrix(
        ranks=[clusters[1], clusters[2]]
    ).flatten()
    # combine and min-max normalize
    post = np.append(np.append(post_0_1, post_0_2), post_1_2)
    post -= post.min()
    post /= post.ptp()
    # split normalized distances by cluster pair
    post_norm_0_1 = post[: post_0_1.shape[0]]
    post_norm_0_2 = post[post_0_1.shape[0] : post_0_1.shape[0] + post_0_2.shape[0]]
    post_norm_1_2 = post[post_0_1.shape[0] + post_0_2.shape[0] :]

    # calculate EMD and Pearson correlation stats
    EMD = [
        wasserstein_1d(dist_norm_0_1, post_norm_0_1),
        wasserstein_1d(dist_norm_0_2, post_norm_0_2),
        wasserstein_1d(dist_norm_1_2, post_norm_1_2),
    ]
    corr_stats = [
        pearsonr(x=dist_0_1, y=post_0_1)[0],
        pearsonr(x=dist_0_2, y=post_0_2)[0],
        pearsonr(x=dist_1_2, y=post_1_2)[0],
    ]

    if cluster_names is None:
        cluster_names = clusters.copy()

    # generate jointplot
    g = sns.JointGrid(x=dist, y=post, space=0, height=figsize[0])
    g.plot_joint(plt.hist2d, bins=50, cmap=sns.cubehelix_palette(as_cmap=True))
    sns.kdeplot(
        x=dist_norm_0_1,
        shade=False,
        bw_method=0.01,
        ax=g.ax_marg_x,
        color="darkorange",
        label=cluster_names[0] + " - " + cluster_names[1],
        legend=legend,
    )
    sns.kdeplot(
        x=dist_norm_0_2,
        shade=False,
        bw_method=0.01,
        ax=g.ax_marg_x,
        color="darkgreen",
        label=cluster_names[0] + " - " + cluster_names[2],
        legend=legend,
    )
    sns.kdeplot(
        x=dist_norm_1_2,
        shade=False,
        bw_method=0.01,
        ax=g.ax_marg_x,
        color="darkred",
        label=cluster_names[1] + " - " + cluster_names[2],
        legend=legend,
    )
    if legend:
        g.ax_marg_x.legend(loc=(1.01, 0.1))

    sns.kdeplot(
        y=post_norm_0_1,
        shade=False,
        bw_method=0.01,
        color="darkorange",
        ax=g.ax_marg_y,
    )
    sns.kdeplot(
        y=post_norm_0_2,
        shade=False,
        bw_method=0.01,
        color="darkgreen",
        ax=g.ax_marg_y,
    )
    sns.kdeplot(
        y=post_norm_1_2,
        shade=False,
        bw_method=0.01,
        color="darkred",
        ax=g.ax_marg_y,
    )
    g.ax_joint.plot(
        np.linspace(max(min(dist), min(post)), 1, 100),
        np.linspace(max(min(dist), min(post)), 1, 100),
        linestyle="dashed",
        color=sns.cubehelix_palette()[-1],
    )  # plot identity line as reference for regression
    if ax_labels:
        plt.xlabel(ax_labels[0], fontsize="xx-large", color=sns.cubehelix_palette()[-1])
        plt.ylabel(ax_labels[1], fontsize="xx-large", color=sns.cubehelix_palette()[2])

    plt.tick_params(labelleft=False, labelbottom=False)

    return corr_stats, EMD