linkage_method='complete', key_added='dendrogram_louvain') sc.tl.rank_genes_groups(adata, 'louvain', method='wilcoxon', n_genes=50, use_raw=True) sc.tl.filter_rank_genes_groups(adata, groupby='louvain', use_raw=True, log=True, key_added='rank_genes_groups_filtered', min_in_group_fraction=0.25, min_fold_change=1.25, max_out_group_fraction=0.25) sc.pl.rank_genes_groups_dotplot(adata, key='rank_genes_groups_filtered', groupby='louvain', mean_only_expressed=True, n_genes=6, save='_markerDotPlots.png', show=False, color_map=my_dot_cmap, dendrogram=True) mjc.write_marker_file(adata) adata.write('./data/Processed.concatenated.anndata.h5ad')
if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Found cells expressing', ' '.join(genes_to_plot), '\n') if run_marker_analysis: print("\nAll done with general workflow... now finding marker genes.\n") ## Find marker genes via Wilxocon test based on Louvain cluster assignment # Create a simple plot to show the top 25 most significant markers for each cluster sc.tl.rank_genes_groups(adata, 'louvain', method='wilcoxon') mjc.write_marker_file(adata, file_out=''.join([figure_dir, '/marker_output.csv'])) sc.tl.filter_rank_genes_groups(adata, groupby='louvain', use_raw=True, log=True, key_added='rank_genes_groups_filtered', min_in_group_fraction=0.5, min_fold_change=2, max_out_group_fraction=0.5) sc.pl.rank_genes_groups(adata, key='rank_genes_groups_filtered', n_genes=30, sharey=False, save = '_markerPlots.pdf', show = False) sc.pl.rank_genes_groups_dotplot(adata, key='rank_genes_groups_filtered', n_genes=6, save = '_markerDotPlots.pdf', color_map=my_dot_cmap, show = False, mean_only_expressed=True, dot_min=0.2, dot_max=1, standard_scale='var') print('\nDone with entire script execution')
def process_adata(adata): # Assign score for gender based on expression of Y-chromosome genes sc.tl.score_genes(adata, y_chrom_genes_only, ctrl_size=50, gene_pool=None, n_bins=25, score_name='maleness', random_state=0, copy=False, use_raw=False) sc.tl.score_genes(adata, androgen_genes_only, ctrl_size=50, gene_pool=None, n_bins=25, score_name='femaleness', random_state=0, copy=False, use_raw=False) sc.pl.violin(adata, keys=['maleness', 'femaleness'], groupby='age', save='_gender_plot.png', show=False, ax=None) ## Identify highly-variable genes based on dispersion relative to expression level. sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=6, min_disp=0.2) ## Filter the genes to remove non-variable genes since they are uninformative adata = adata[:, adata.var['highly_variable']] ## Regress out effects of total reads per cell and the percentage of mitochondrial genes expressed. sc.pp.regress_out(adata, ['n_counts', 'S_score', 'G2M_score']) ## Scale each gene to unit variance. Clip values exceeding standard deviation 10 to remove extreme outliers sc.pp.scale(adata, max_value=10) ## Run PCA to compute the default number of components sc.tl.pca(adata, svd_solver='arpack') ## Rank genes according to contributions to PCs. sc.pl.pca_loadings(adata, show=False, components=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], save='_PCA-loadings.png') ## Draw the PCA elbow plot to determine which PCs to use sc.pl.pca_variance_ratio(adata, log=True, n_pcs = 100, save = '_elbowPlot.png', show = False) ## Compute nearest-neighbors sc.pp.neighbors(adata, n_neighbors=num_neighbors_use, n_pcs=num_pcs_use) ## fix batch differences based on XX/XY bbknn.bbknn(adata, batch_key='sampleName', n_pcs=75, neighbors_within_batch=3, copy=False) ## Calculate cell clusters via Louvain algorithm sc.tl.louvain(adata, resolution = louv_res) sc.tl.paga(adata, groups='louvain') sc.pl.paga(adata, color='louvain', save=False, show=False, threshold=threshold, node_size_scale=node_size_scale, node_size_power=0.9, layout=paga_layout) sc.tl.umap(adata, init_pos='paga', min_dist=umap_min_dist, maxiter=maxiter, spread=umap_spread, gamma=umap_gamma, random_state=random_state) #sc.tl.umap(adata, init_pos='spectral', min_dist=umap_min_dist, maxiter=maxiter, spread=umap_spread, gamma=umap_gamma, random_state=random_state) ## Run tSNE algorithm sc.tl.tsne(adata, n_pcs=num_pcs_use) ## Run draw_graph to get a FA2 graph layout sc.tl.draw_graph(adata,layout='fa', init_pos='paga', scalingRatio=4.0) sc.pl.umap(adata, color='louvain', save = '_clusterIdentity_noEdge.png', show = False, legend_loc = 'on data', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6) sc.pl.umap(adata, color=['louvain', 'age'], save = '_clusterIdentity_age.png', show = False, legend_loc = 'right margin', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6) sc.pl.umap(adata, color='age', save = '_age.png', show = False, legend_loc = 'right margin', edges = False, size = dot_size, palette = greatestPalette, alpha = 0.95) sc.pl.umap(adata, color='sex', save = '_sex.png', show = False, legend_loc = 'right margin', edges = False, size = dot_size, palette = greatestPalette, alpha = 0.95) sc.pl.umap(adata, color='sampleName', save = '_sample.png', show = False, legend_loc = 'right margin', edges = False, size = dot_size, palette = greatestPalette, alpha = 0.95) sc.pl.umap(adata, color=['n_genes','n_counts','percent_mito'], save = '_stats.png', show = False, edges = False, cmap = my_feature_cmap, size = dot_size+10) sc.pl.tsne(adata, color='louvain', save = '_clusterIdentity_noEdge.png', show = False, legend_loc = 'on data', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6) sc.pl.tsne(adata, color=['louvain', 'age'], save = '_clusterIdentity_age.png', show = False, legend_loc = 'right margin', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6) sc.pl.draw_graph(adata, color='louvain', save = '_clusterIdentity_noEdge.png', show = False, legend_loc = 'on data', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6) sc.pl.draw_graph(adata, color=['louvain', 'age'], save = '_clusterIdentity_age.png', show = False, legend_loc = 'right margin', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6) sc.pl.paga(adata, color='louvain', save=''.join(['_', paga_layout, '_page.png']), show=False, threshold=threshold, node_size_scale=node_size_scale, node_size_power=0.9, layout=paga_layout) ''' sc.tl.tsne(adata, n_pcs=num_pcs_use, use_rep='X_pca', perplexity=30, early_exaggeration=12, learning_rate=1000, random_state=random_state, use_fast_tsne=True, n_jobs=10, copy=False) sc.pl.tsne(adata, color='louvain', save = '_clusterIdentity_noEdge.png', show = False, legend_loc = 'on data', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6) sc.pl.tsne(adata, color=['louvain', 'age'], save = '_clusterIdentity_age.png', show = False, legend_loc = 'right margin', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6) sc.pl.tsne(adata, color='age', save = '_age.png', show = False, legend_loc = 'right margin', edges = False, size = dot_size, palette = greatestPalette, alpha = 0.95) sc.pl.tsne(adata, color='sex', save = '_sex.png', show = False, legend_loc = 'right margin', edges = False, size = dot_size, palette = greatestPalette, alpha = 0.95) sc.pl.tsne(adata, color='sampleName', save = '_sample.png', show = False, legend_loc = 'right margin', edges = False, size = dot_size, palette = greatestPalette, alpha = 0.95) sc.pl.tsne(adata, color=['n_genes','n_counts','percent_mito'], save = '_stats.png', show = False, edges = False, cmap = my_feature_cmap, size = dot_size+10) ''' sc.tl.rank_genes_groups(adata, 'louvain', method='wilcoxon', n_genes=100, use_raw=True) #sc.tl.filter_rank_genes_groups(adata, groupby='louvain', use_raw=True, log=True, key_added='rank_genes_groups_filtered', min_in_group_fraction=0.05, min_fold_change=1, max_out_group_fraction=0.95) sc.pl.rank_genes_groups_dotplot(adata, key='rank_genes_groups', groupby='louvain', mean_only_expressed=True, n_genes=6, save = '_markerDotPlots.png', show = False, color_map=my_dot_cmap, dendrogram=True) mjc.write_marker_file(adata, file_out=''.join([figure_dir, '/marker_output.csv']), n_genes=100) expressed_dict = dict() for gene in adata.raw.var_names.values.tolist(): if gene not in expressed_dict: expressed_dict[str(gene)] = 1 genes_to_plot = [] for gene in genes_of_interest: if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Plotting genes:', ', '.join(genes_to_plot),'\n') sc.pl.umap(adata, color=genes_to_plot, save = '_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True) genes_to_plot = [] for gene in epi_cell_type_genes: if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Plotting genes:', ', '.join(genes_to_plot),'\n') sc.pl.umap(adata, color=genes_to_plot, save = '_epi_cell_types_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True) genes_to_plot = [] for gene in emilys_list: if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Plotting genes:', ', '.join(genes_to_plot),'\n') sc.pl.umap(adata, color=genes_to_plot, save = '_emilysGenes_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True) genes_to_plot = [] for gene in fig_genes: if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Plotting genes:', ', '.join(genes_to_plot),'\n') sc.pl.umap(adata, color=genes_to_plot, save = '_fig2_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True) sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save='_figure2_DotPlots.png', standard_scale='var', show=False, color_map=my_dot_cmap, dendrogram=False) #sc.pl.tsne(adata, color=genes_to_plot, save = '_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True) fig_1D_genes = ['DCN','COL1A1','COL1A2','RGS5','PDGFRB','ANO1','KIT','ACTA2','TAGLN','PDGFRA','DLL1','F3','NPY','GPX3'] genes_to_plot = [] for gene in fig_1D_genes: if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Plotting genes:', ', '.join(genes_to_plot),'\n') sc.pl.dotplot(adata, genes_to_plot, color_map = my_feature_cmap, groupby='louvain', var_group_positions=[(0,2),(3,4),(5,6),(7,8),(9,13)], var_group_labels=['Fibroblasts','Vasc. SMCs','ICCs','SMCs','Submucosal'], var_group_rotation=45, use_raw=True, log=True, dendrogram=True, expression_cutoff=expression_cutoff, mean_only_expressed=True, show=False, save='_fig_1D.png') sc.pl.dotplot(adata, genes_to_plot, color_map = my_feature_cmap, groupby='louvain', var_group_positions=[(0,2),(3,4),(5,6),(7,8),(9,13)], var_group_labels=['Fibroblasts','Vasc. SMCs','ICCs','SMCs','Submucosal'], var_group_rotation=45, use_raw=True, log=True, dendrogram=True, expression_cutoff=expression_cutoff, mean_only_expressed=True, show=False, save='_fig_1D.pdf') sc.pl.umap(adata, color=genes_to_plot, save = '_fig1D_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True) genes_to_plot = [] for gene in y_chrom_genes: if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Plotting genes:', ', '.join(genes_to_plot),'\n') sc.pl.umap(adata, color=genes_to_plot, save = '_y_chrom_featurePlots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True) #sc.pl.tsne(adata, color=genes_to_plot, save = '_y_chrom_featurePlots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True) #sc.tl.dendrogram(adata, 'louvain', n_pcs=num_pcs_use, use_raw=True, cor_method='pearson', linkage_method='complete', key_added='dendrogram_louvain') sc.tl.rank_genes_groups(adata, 'louvain', method='wilcoxon', n_genes=50, use_raw=True) sc.tl.filter_rank_genes_groups(adata, groupby='louvain', use_raw=True, log=True, key_added='rank_genes_groups_filtered', min_in_group_fraction=0.25, min_fold_change=1.5, max_out_group_fraction=0.5) sc.pl.rank_genes_groups_dotplot(adata, key='rank_genes_groups_filtered', groupby='louvain', mean_only_expressed=True, n_genes=10, save = '_markerDotPlots.png', show = False, color_map=my_dot_cmap, dendrogram=True) fig_2A_genes = ['ACTA2','TAGLN','DLL1','F3','NPY','GPX3'] genes_to_plot = [] for gene in fig_2A_genes: if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Plotting genes:', ', '.join(genes_to_plot),'\n') sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='age', use_raw=True, log=True, mean_only_expressed=True, expression_cutoff=1.0, save = '_fig2A_DotPlot.png', standard_scale='var', smallest_dot=0, show = False, color_map=my_dot_cmap, dendrogram=False) sc.pl.matrixplot(adata, var_names=genes_to_plot, groupby='age', use_raw=True, log=False, save = '_fig2A_MatrixPlot.png', show = False) sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig2B_DotPlot.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True) sc.pl.umap(adata, color=genes_to_plot, save = '_fig2B_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True) fig_3A_genes = ['ACTA2','TAGLN','F3','NPY','GPX3','WNT2B','RSPO2','RSPO3','NOG','CHRD','EGF'] genes_to_plot = [] for gene in fig_3A_genes: if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Plotting genes:', ', '.join(genes_to_plot),'\n') sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', use_raw=True, log=False, mean_only_expressed=True, save = '_fig3A_DotPlot.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.5) sc.pl.umap(adata, color=genes_to_plot, save = '_fig3A_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True) crypt_SEC_genes = ['F3','DLL1','COL15A1','NRG1','CH25H','MMP11','CXCR4','CPM','BMP3','IGFBP5','ADAMDEC1','CTGF','CRY61','IGFBP3','HHIP','EFEMP1','NPY','CTCSC','NBEAL1','EIF5A','RPSAP58'] genes_to_plot = [] for gene in crypt_SEC_genes: if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Plotting genes:', ', '.join(genes_to_plot),'\n') sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', use_raw=True, log=False, mean_only_expressed=True, save = '_crypt_SEC_DotPlot.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True) sc.pl.umap(adata, color=genes_to_plot, save = '_crypt_SEC_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True) fig_3B_genes = ['LGR5','OLFM4','FABP2','SI','DPP4','F3','NPY','ACTA2','TAGLN','NRG1','NRG2','NRG3','NRG4','TGFA','HBEGF','AREG','BTC','EPGN','EREG','EGFR','ERBB2','ERBB3','ERBB4'] genes_to_plot = [] for gene in fig_3B_genes: if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Plotting genes:', ', '.join(genes_to_plot),'\n') sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig3B_DotPlot_logScale.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.25, log=True) sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig3B_DotPlot_linearScale.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.25, log=False) fig_3B1_genes = ['LGR5','OLFM4'] genes_to_plot = [] for gene in fig_3B1_genes: if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Plotting genes:', ', '.join(genes_to_plot),'\n') sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig3B1_DotPlot_logScale.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.25, log=True) fig_3B2_genes = ['FABP2','SI','DPP4'] genes_to_plot = [] for gene in fig_3B2_genes: if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Plotting genes:', ', '.join(genes_to_plot),'\n') sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig3B2_DotPlot_logScale.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.25, log=True) fig_3B3_genes = ['F3','NPY','ACTA2','TAGLN'] genes_to_plot = [] for gene in fig_3B3_genes: if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Plotting genes:', ', '.join(genes_to_plot),'\n') sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig3B3_DotPlot_logScale.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.25, log=True) fig_3B4_genes = ['NRG1','NRG2','NRG3','NRG4','TGFA','HBEGF','AREG','BTC','EPGN','EREG'] genes_to_plot = [] for gene in fig_3B4_genes: if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Plotting genes:', ', '.join(genes_to_plot),'\n') sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig3B4_DotPlot_logScale.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.25, log=True) fig_3B5_genes = ['EGFR','ERBB2','ERBB3','ERBB4'] genes_to_plot = [] for gene in fig_3B5_genes: if gene in expressed_dict: genes_to_plot.append(gene) else: print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n') print('Plotting genes:', ', '.join(genes_to_plot),'\n') sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig3B5_DotPlot_logScale.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.25, log=True) return(adata)