Beispiel #1
0
def plot_pca2(df, filters=None):
    # index has sample names, colors+markers should be constructed with filter
    if filters is None:
        filters = create_diagonal_trues(df)
        markers, colors = construct_point(filters.shape)
    else:
        markers, colors = construct_point(filters.index)
    fig, ax = plt.subplots(figsize=(10, 8))
Beispiel #2
0
def plot_kmeans(df,
                k,
                n_clusts,
                title,
                col_names=('Principal Component 1', 'Principal Component 2')):
    """

    Args:
        df:
        k:
        col_names:

    Returns:

    """
    # markers = ['o', 'v', 's', 'd', 'P', '+', '8', '*', "X", 'x', "D", 'd', 7, 6,
    #            "$:)$", "$:($", "$=$", "$?$",
    #            "$!$"]
    markers, colors = helpers.construct_point(n_clusts)
    # colors = [colors]
    for n in range(n_clusts):
        k_f = k == n, 0
        k_t = k == n, 1
        pc1 = df.loc[k_t[0], col_names[0]]
        pc2 = df.loc[k_t[0], col_names[1]]
        plt.scatter(
            pc1,
            pc2,
            # c=[colors[n]], marker=markers[n],
            edgecolor='black',
            label='Cluster {}'.format(n))
        this_clust = df[k_t[0]]
        print(this_clust.iloc[:, :2], '\n')
        plt.legend()
        label_count = 0
        indices = pc1.index.to_list()
        for x, y in zip(pc1, pc2):
            plt.annotate(
                indices[label_count],  # this is the text
                (x, y),  # this is the point to label
                textcoords="offset points",  # how to position the text
                xytext=(0, 10),  # distance from text to points (x,y)
                ha='center',
                size=6)  # horizontal alignment can be left, right or center
            label_count += 1
        plt.savefig(
            '/Users/coltongarelli/Desktop/laura\'s LPP stuff/kmeans_{}_plot.pdf'
            .format(title))
Beispiel #3
0
def plot_pca(df, rows, batch: bool = False):
    """Plots PCA data on a scatter plot where x = PC1 and y = PC2

    Plot the first two principal components of PC analyzed data on a scattermap. Plotting PCs can be used as a measure
    for batch effect or correction. PCA can also be used to determine correlation of a list of interest genes.

    Notes:
        - Perform PCA on specific genes to utilize PCA for non-batch analysis

    Args:
        df: a DataFrame containing PC analyzed data to plot on a scatter plot (x = PC1, y= PC2)
        rows: A dictionary that maps each sample or batch to each PC data experiment/sample in df.
        batch: consider changing to 'group' for generic grouping
    Returns:
        Figure and Axes objects

    """
    markers, colors = construct_point(df.shape[0])
    cdict = dict(zip(rows.keys(), colors))
    marker = dict(zip(rows.keys(), markers))
    fig, ax = plt.subplots(figsize=(10, 8))
    if batch:
        # consider changing batch to 'group' for generic grouping
        labels = ['Batch {}'.format(i + 1) for i in range(len(colors))]
    else:
        labels = df.index.to_list()
    for k, v in rows.items():
        ax.scatter(x=df.loc[v, 'Principal Component 1'],
                   y=df.loc[v, 'Principal Component 2'],
                   c=[cdict.get(k)],
                   marker=marker.get(k),
                   label=labels[k],
                   s=55)
    plt.xlabel("Principal Component 1", fontsize=15)
    plt.ylabel("Principal Component 2", fontsize=15)

    for i in df.index.to_list():
        ax.annotate(i, (df.loc[i, ['Principal Component 1']] + .05,
                        df.loc[i, ['Principal Component 2']] + .05))
    # ax.legend(loc='upper right', bbox_to_anchor=(1.04, 1))
    plt.tight_layout()
    ax.legend().remove()
    return fig, ax
Beispiel #4
0
def plotly_3d_pca(df, batch: bool = False):
    markers, colors = construct_point(df.shape[0])
    # cdict = dict(zip(rows.keys(), colors))
    # marker = dict(zip(rows.keys(), markers))
    # if batch:
    #     # consider changing batch to 'group' for generic grouping
    #     labels = ['Batch {}'.format(i+1) for i in range(len(colors))]
    # else:
    #     labels = df.index.to_list()
    x = df.T.loc['Principal Component 1']
    y = df.T.loc['Principal Component 2']
    z = df.T.loc['Principal Component 3']
    fig = plotly.offline.plot(
        go.Figure(go.Scatter3d(x=x, y=y, z=z, mode='markers', text=labels)))
    fig.update_traces(textposition='top center')
    fig.update_layout(title_text='3D PCA of LPP Nanostring by groups')
    fig.update_layout(scene=dict(xaxis_title='Principal Component 1',
                                 yaxis_title='Principal Component 2',
                                 zaxis_title='Principal Component 3'))
    return fig