def manifold_plot(man, fpkmMatrix, samples, standardize=3, log=True, show_text=False, sep='_', legend_loc='best', legend_size=14): # man: the instance of a manifold algorithm ## preprocessing of the fpkmMatrix if log: fpkmMatrix = np.log10(fpkmMatrix + 1.) if standardize == 2: # standardize along rows/genes fpkmMatrix = zscore(fpkmMatrix, axis=1) elif standardize == 1: # standardize along cols/samples fpkmMatrix = zscore(fpkmMatrix, axis=0) fpkmMatrix = man.fit_transform(fpkmMatrix.T) fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(111) scatter_proxies = [] labels_show = [] groups = {} conditions = list(set([s.split(sep)[0] for s in samples])) for row, label in zip(fpkmMatrix, samples): label_show = label.split(sep)[0] idx = conditions.index(label_show) ax.scatter(row[0], row[1], label='label', color=COLORS10[idx], visible=not show_text, s=50, marker='o') if label_show not in labels_show: labels_show.append(label_show) scatter1_proxy = Line2D([0], [0], ls="none", c=COLORS10[idx], marker='o') scatter_proxies.append(scatter1_proxy) if show_text: ax.text(row[0], row[1], label, \ ha='center', va='center', rotation=0, color=COLORS10[idx], size='large') ax.legend(scatter_proxies, labels_show, numpoints=1, frameon=True, loc=legend_loc, prop={'size': legend_size}) ax.set_xlabel('M1', fontsize=20) ax.set_ylabel('M2', fontsize=20) enlarge_tick_fontsize(ax, 14) fig.tight_layout() plt.show() return
def PCA_plot(fpkmMatrix, samples, standardize=3, log=True, show_text=False, sep='_', legend_loc='best', legend_size=14): # standardize: whether to a zscore transformation on the log10 transformed FPKM pca = PCA(n_components=None) ## preprocessing of the fpkmMatrix if log: fpkmMatrix = np.log10(fpkmMatrix + 1.) if standardize == 2: # standardize along rows/genes fpkmMatrix = zscore(fpkmMatrix, axis=1) elif standardize == 1: # standardize along cols/samples fpkmMatrix = zscore(fpkmMatrix, axis=0) ## remove genes with NaNs fpkmMatrix = fpkmMatrix[~np.isnan(np.sum(fpkmMatrix, axis=1))] ## get variance captured pca.fit(fpkmMatrix.T) variance_explained = pca.explained_variance_ratio_[0:3] variance_explained *= 100 ## compute PCA and plot pca = PCA(n_components=2) pca_transformed = pca.fit_transform(fpkmMatrix.T) fig = plt.figure(figsize=(8,8)) ax = fig.add_subplot(111) scatter_proxies = [] labels_show = [] groups = {} conditions = list(set([s.split(sep)[0] for s in samples])) colors = COLORS10 if len(conditions) > 10: colors = COLORS20 if len(conditions) > 20: r = lambda: random.randint(0,255) colors = ['#%02X%02X%02X' % (r(),r(),r()) for i in range(len(conditions))] for row, label in zip(pca_transformed, samples): label_show = label.split(sep)[0] idx = conditions.index(label_show) ax.scatter(row[0], row[1], label='label', color=colors[idx], s=50, marker='o') if label_show not in labels_show: labels_show.append(label_show) scatter1_proxy = Line2D([0],[0], ls="none", c=colors[idx], marker='o') scatter_proxies.append(scatter1_proxy) if show_text: ax.text(row[0], row[1]-2, label.split(sep)[1], \ ha='center', va='center', rotation=0, color=colors[idx], size='large') ax.legend(scatter_proxies, labels_show, numpoints=1, frameon=True,loc=legend_loc, prop={'size':legend_size}) ax.set_xlabel('PC1 (%.2f'%variance_explained[0] + '%' + ' variance captured)', fontsize=20) ax.set_ylabel('PC2 (%.2f'%variance_explained[1] + '%' + ' variance captured)', fontsize=20) enlarge_tick_fontsize(ax, 14) fig.tight_layout() plt.show()
def manifold_plot(man, fpkmMatrix, samples, standardize=3, log=True, show_text=False, sep='_', legend_loc='best', legend_size=14): # man: the instance of a manifold algorithm ## preprocessing of the fpkmMatrix if log: fpkmMatrix = np.log10(fpkmMatrix + 1.) if standardize == 2: # standardize along rows/genes fpkmMatrix = zscore(fpkmMatrix, axis=1) elif standardize == 1: # standardize along cols/samples fpkmMatrix = zscore(fpkmMatrix, axis=0) fpkmMatrix = man.fit_transform(fpkmMatrix.T) fig = plt.figure(figsize=(8,8)) ax = fig.add_subplot(111) scatter_proxies = [] labels_show = [] groups = {} conditions = list(set([s.split(sep)[0] for s in samples])) for row, label in zip(fpkmMatrix, samples): label_show = label.split(sep)[0] idx = conditions.index(label_show) ax.scatter(row[0], row[1], label='label', color=COLORS10[idx], visible=not show_text, s=50, marker='o') if label_show not in labels_show: labels_show.append(label_show) scatter1_proxy = Line2D([0],[0], ls="none", c=COLORS10[idx], marker='o') scatter_proxies.append(scatter1_proxy) if show_text: ax.text(row[0], row[1], label, \ ha='center', va='center', rotation=0, color=COLORS10[idx], size='large') ax.legend(scatter_proxies, labels_show, numpoints=1, frameon=True,loc=legend_loc, prop={'size':legend_size}) ax.set_xlabel('M1', fontsize=20) ax.set_ylabel('M2', fontsize=20) enlarge_tick_fontsize(ax, 14) fig.tight_layout() plt.show() return
def PCA_plot(fpkmMatrix, samples, standardize=3, log=True, show_text=False, sep='_', legend_loc='best', legend_size=14): # standardize: whether to a zscore transformation on the log10 transformed FPKM ## perform PCA variance_explained, pca_transformed = perform_PCA(fpkmMatrix, standardize=standardize, log=log) fig = plt.figure(figsize=(8,8)) ax = fig.add_subplot(111) scatter_proxies = [] labels_show = [] groups = {} conditions = list(set([s.split(sep)[0] for s in samples])) colors = COLORS10 if len(conditions) > 10: colors = COLORS20 if len(conditions) > 20: r = lambda: random.randint(0,255) colors = ['#%02X%02X%02X' % (r(),r(),r()) for i in range(len(conditions))] for row, label in zip(pca_transformed, samples): label_show = label.split(sep)[0] idx = conditions.index(label_show) ax.scatter(row[0], row[1], label='label', color=colors[idx], s=50, marker='o') if label_show not in labels_show: labels_show.append(label_show) scatter1_proxy = Line2D([0],[0], ls="none", c=colors[idx], marker='o') scatter_proxies.append(scatter1_proxy) if show_text: ax.text(row[0], row[1]-2, label.split(sep)[1], \ ha='center', va='center', rotation=0, color=colors[idx], size='large') ax.legend(scatter_proxies, labels_show, numpoints=1, frameon=True,loc=legend_loc, prop={'size':legend_size}) ax.set_xlabel('PC1 (%.2f'%variance_explained[0] + '%' + ' variance captured)', fontsize=20) ax.set_ylabel('PC2 (%.2f'%variance_explained[1] + '%' + ' variance captured)', fontsize=20) enlarge_tick_fontsize(ax, 14) fig.tight_layout() plt.show()
def PCA_plot2(fpkmMatrix, color_by, shape_by, standardize=3, log=True, legend_loc='best', legend_size=14): variance_explained, pca_transformed = perform_PCA(fpkmMatrix, standardize=standardize, log=log) fig = plt.figure(figsize=(8,8)) ax = fig.add_subplot(111) scatter_proxies = [] labels_show = [] # for legend and scatter proxies color_uniq = list(set(color_by)) shape_uniq = list(set(shape_by)) colors = COLORS10 if len(color_uniq) > 10: colors = COLORS20 if len(color_uniq) > 20: r = lambda: random.randint(0,255) colors = ['#%02X%02X%02X' % (r(),r(),r()) for i in range(len(color_uniq))] shapes = 'osv^phd' for row, label_c, label_s in zip(pca_transformed, color_by, shape_by): idx_c = color_uniq.index(label_c) idx_s = shape_uniq.index(label_s) ax.scatter(row[0], row[1], label='label', color=colors[idx_c], s=50, marker=shapes[idx_s]) label = '%s-%s' %(label_c, label_s) if label not in labels_show: labels_show.append(label) scatter1_proxy = Line2D([0],[0], ls="none", c=colors[idx_c], marker=shapes[idx_s]) scatter_proxies.append(scatter1_proxy) ax.legend(scatter_proxies, labels_show, numpoints=1, frameon=True,loc=legend_loc, prop={'size':legend_size}) ax.set_xlabel('PC1 (%.2f'%variance_explained[0] + '%' + ' variance captured)', fontsize=20) ax.set_ylabel('PC2 (%.2f'%variance_explained[1] + '%' + ' variance captured)', fontsize=20) enlarge_tick_fontsize(ax, 14) fig.tight_layout() return fig
def PCA_plot2(fpkmMatrix, color_by, shape_by, standardize=3, log=True, legend_loc='best', legend_size=14): variance_explained, pca_transformed = perform_PCA(fpkmMatrix, standardize=standardize, log=log) fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(111) scatter_proxies = [] labels_show = [] # for legend and scatter proxies color_uniq = list(set(color_by)) shape_uniq = list(set(shape_by)) colors = COLORS10 if len(color_uniq) > 10: colors = COLORS20 if len(color_uniq) > 20: r = lambda: random.randint(0, 255) colors = [ '#%02X%02X%02X' % (r(), r(), r()) for i in range(len(color_uniq)) ] shapes = 'osv^phd' for row, label_c, label_s in zip(pca_transformed, color_by, shape_by): idx_c = color_uniq.index(label_c) idx_s = shape_uniq.index(label_s) ax.scatter(row[0], row[1], label='label', color=colors[idx_c], s=50, marker=shapes[idx_s]) label = '%s-%s' % (label_c, label_s) if label not in labels_show: labels_show.append(label) scatter1_proxy = Line2D([0], [0], ls="none", c=colors[idx_c], marker=shapes[idx_s]) scatter_proxies.append(scatter1_proxy) ax.legend(scatter_proxies, labels_show, numpoints=1, frameon=True, loc=legend_loc, prop={'size': legend_size}) ax.set_xlabel('PC1 (%.2f' % variance_explained[0] + '%' + ' variance captured)', fontsize=20) ax.set_ylabel('PC2 (%.2f' % variance_explained[1] + '%' + ' variance captured)', fontsize=20) enlarge_tick_fontsize(ax, 14) fig.tight_layout() plt.show() return
def PCA_plot(fpkmMatrix, samples, standardize=3, log=True, show_text=False, sep='_', legend_loc='best', legend_size=14): # standardize: whether to a zscore transformation on the log10 transformed FPKM pca = PCA(n_components=None) ## preprocessing of the fpkmMatrix if log: fpkmMatrix = np.log10(fpkmMatrix + 1.) if standardize == 2: # standardize along rows/genes fpkmMatrix = zscore(fpkmMatrix, axis=1) elif standardize == 1: # standardize along cols/samples fpkmMatrix = zscore(fpkmMatrix, axis=0) ## remove genes with NaNs fpkmMatrix = fpkmMatrix[~np.isnan(np.sum(fpkmMatrix, axis=1))] ## get variance captured pca.fit(fpkmMatrix.T) variance_explained = pca.explained_variance_ratio_[0:3] variance_explained *= 100 ## compute PCA and plot pca = PCA(n_components=2) pca_transformed = pca.fit_transform(fpkmMatrix.T) fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(111) scatter_proxies = [] labels_show = [] groups = {} conditions = list(set([s.split(sep)[0] for s in samples])) colors = COLORS10 if len(conditions) > 10: colors = COLORS20 if len(conditions) > 20: r = lambda: random.randint(0, 255) colors = [ '#%02X%02X%02X' % (r(), r(), r()) for i in range(len(conditions)) ] for row, label in zip(pca_transformed, samples): label_show = label.split(sep)[0] idx = conditions.index(label_show) ax.scatter(row[0], row[1], label='label', color=colors[idx], s=50, marker='o') if label_show not in labels_show: labels_show.append(label_show) scatter1_proxy = Line2D([0], [0], ls="none", c=colors[idx], marker='o') scatter_proxies.append(scatter1_proxy) if show_text: ax.text(row[0], row[1]-2, label.split(sep)[1], \ ha='center', va='center', rotation=0, color=colors[idx], size='large') ax.legend(scatter_proxies, labels_show, numpoints=1, frameon=True, loc=legend_loc, prop={'size': legend_size}) ax.set_xlabel('PC1 (%.2f' % variance_explained[0] + '%' + ' variance captured)', fontsize=20) ax.set_ylabel('PC2 (%.2f' % variance_explained[1] + '%' + ' variance captured)', fontsize=20) enlarge_tick_fontsize(ax, 14) fig.tight_layout() plt.show()