def plot_pca(dat,
             colour_subgroups,
             p=None,
             components=(0, 1),
             marker_subgroups=None,
             ax=None,
             colour_map=None,
             marker_map=None,
             **kwargs):
    if p is None:
        p = pca.PCA()
        pca_data = p.fit_transform(dat.transpose())
    else:
        pca_data = p.transform(dat.transpose())
    variance_explained = p.explained_variance_ratio_ * 100.

    ax = scatter.scatter_with_colour_and_markers(
        pca_data[:, components],
        colour_subgroups=colour_subgroups,
        colour_map=colour_map,
        marker_subgroups=marker_subgroups,
        marker_map=marker_map,
        ax=ax,
        **kwargs)

    ax.set_xlabel("PCA component %s (%.1f%%)" %
                  (components[0] + 1, variance_explained[components[0]]))
    ax.set_ylabel("PCA component %s (%.1f%%)" %
                  (components[1] + 1, variance_explained[components[1]]))

    return p, ax
def plot_pca(dat,
             colour_subgroups,
             p=None,
             components=(0, 1),
             marker_subgroups=None,
             ax=None,
             colour_map=None,
             marker_map=None,
             **kwargs):
    if p is None:
        p = pca.PCA()
        pca_data = p.fit_transform(dat.transpose())
    else:
        pca_data = p.transform(dat.transpose())
    variance_explained = p.explained_variance_ratio_ * 100.

    ax = scatter.scatter_with_colour_and_markers(
        pca_data[:, components],
        colour_subgroups=colour_subgroups,
        colour_map=colour_map,
        marker_subgroups=marker_subgroups,
        marker_map=marker_map,
        ax=ax,
        **kwargs)
    # Update: from Seaborn v 0.9.0, the legend appears with a frame by default, but this doesn't look good.
    leg = ax.get_legend()
    leg.set_frame_on(False)

    ax.set_xlabel("PCA component %s (%.1f%%)" %
                  (components[0] + 1, variance_explained[components[0]]))
    ax.set_ylabel("PCA component %s (%.1f%%)" %
                  (components[1] + 1, variance_explained[components[1]]))

    return p, ax
Exemple #3
0
    def compare_two_gene_levels(
        dat_two_cols,
        meta,
        legend_dict,
        colour_map=scatter_colours,
        marker_map=scatter_markers,
    ):

        ax = scatter.scatter_with_colour_and_markers(
            dat_two_cols,
            colour_subgroups=meta.patient_id,
            colour_map=colour_map,
            marker_subgroups=meta.type,
            marker_map=marker_map
        )
        common.add_custom_legend(ax, legend_dict, loc_outside=True)
        ax.set_xlabel('%s (logTPM)' % dat_two_cols.columns[0])
        ax.set_ylabel('%s (logTPM)' % dat_two_cols.columns[1])
        fig = ax.figure
        fig.tight_layout()
        fig.subplots_adjust(right=0.8)

        return fig, ax
Exemple #4
0
    cmap = collections.OrderedDict([('fl/fl', 'k'),
                                    (r'$\Delta$/$\Delta$', 'w')])

    marker_subgroups = colour_subgroups
    mmap = collections.OrderedDict([('fl/fl', 'o'),
                                    (r'$\Delta$/$\Delta$', 's')])

    p = PCA()
    pc_dat = p.fit_transform(log_dat.transpose())
    variance_explained = p.explained_variance_ratio_ * 100.

    ax = scatter.scatter_with_colour_and_markers(
        pc_dat[:, [0, 1]],
        colour_subgroups=None,
        colour_map=None,
        marker_subgroups=marker_subgroups,
        marker_map=mmap,
        default_colour='k',
        ms=50)
    ax.set_xlabel("PCA component %s (%.1f%%)" % (1, variance_explained[0]),
                  fontsize=fontsize)
    ax.set_ylabel("PCA component %s (%.1f%%)" % (2, variance_explained[1]),
                  fontsize=fontsize)

    leg = ax.legend()
    fr = leg.get_frame()
    fr.set_edgecolor('k')
    plt.setp(leg.get_texts(), fontsize=fontsize)
    plt.setp(ax.xaxis.get_ticklabels(), fontsize=fontsize)
    plt.setp(ax.yaxis.get_ticklabels(), fontsize=fontsize)
Exemple #5
0
                                   "all_samples_pca_019_only_annotated.png"),
                      dpi=200)

    # Again, again! But this time we compute the PCs with only 019 lines, then transform the remaining lines
    # using the same PCs

    p = PCA()
    pca_data = p.fit_transform(mdat_019.transpose())
    variance_explained = p.explained_variance_ratio_ * 100.

    fig = plt.figure(figsize=(6.4, 4.8))
    ax = fig.add_subplot(111)
    ax = scatter.scatter_with_colour_and_markers(
        pca_data[:, [0, 1]],
        colour_subgroups=obj.meta.loc[mdat_019.columns, 'patient_id'],
        colour_map=cmap,
        marker_subgroups=obj.meta.loc[mdat_019.columns, 'descriptor'],
        marker_map=mmap,
        ax=ax,
    )

    ax.set_xlabel("PCA component %s (%.1f%%)" % (1, variance_explained[0]))
    ax.set_ylabel("PCA component %s (%.1f%%)" % (2, variance_explained[1]))
    leg = ax.get_legend()
    leg.set_frame_on(False)
    fig.subplots_adjust(left=0.12, right=0.75, top=0.98)

    fig.savefig(os.path.join(outdir, "pca_019_only.png"), dpi=200)

    for i, col in enumerate(mdat_019.columns):
        this_coords = pca_data[i, :2]
        ax.annotate(col, this_coords)