linkage_method='complete',
                 key_added='dendrogram_louvain')

sc.tl.rank_genes_groups(adata,
                        'louvain',
                        method='wilcoxon',
                        n_genes=50,
                        use_raw=True)

sc.tl.filter_rank_genes_groups(adata,
                               groupby='louvain',
                               use_raw=True,
                               log=True,
                               key_added='rank_genes_groups_filtered',
                               min_in_group_fraction=0.25,
                               min_fold_change=1.25,
                               max_out_group_fraction=0.25)
sc.pl.rank_genes_groups_dotplot(adata,
                                key='rank_genes_groups_filtered',
                                groupby='louvain',
                                mean_only_expressed=True,
                                n_genes=6,
                                save='_markerDotPlots.png',
                                show=False,
                                color_map=my_dot_cmap,
                                dendrogram=True)

mjc.write_marker_file(adata)

adata.write('./data/Processed.concatenated.anndata.h5ad')
	if gene in expressed_dict:
		genes_to_plot.append(gene)
	else:
		print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')

print('Found cells expressing', ' '.join(genes_to_plot), '\n')


if run_marker_analysis:
	print("\nAll done with general workflow... now finding marker genes.\n")
	## Find marker genes via Wilxocon test based on Louvain cluster assignment
	# Create a simple plot to show the top 25 most significant markers for each cluster
	
	sc.tl.rank_genes_groups(adata, 'louvain', method='wilcoxon')
	
	mjc.write_marker_file(adata, file_out=''.join([figure_dir, '/marker_output.csv']))
	
	sc.tl.filter_rank_genes_groups(adata, groupby='louvain', use_raw=True, log=True, key_added='rank_genes_groups_filtered', min_in_group_fraction=0.5, min_fold_change=2, max_out_group_fraction=0.5)
	
	sc.pl.rank_genes_groups(adata, key='rank_genes_groups_filtered', n_genes=30, sharey=False, save = '_markerPlots.pdf', show = False)
	sc.pl.rank_genes_groups_dotplot(adata, key='rank_genes_groups_filtered',  n_genes=6, save = '_markerDotPlots.pdf', color_map=my_dot_cmap, show = False, mean_only_expressed=True, dot_min=0.2, dot_max=1, standard_scale='var')





print('\nDone with entire script execution')



def process_adata(adata):
	# Assign score for gender based on expression of Y-chromosome genes
	sc.tl.score_genes(adata, y_chrom_genes_only, ctrl_size=50, gene_pool=None, n_bins=25, score_name='maleness', random_state=0, copy=False, use_raw=False)
	sc.tl.score_genes(adata, androgen_genes_only, ctrl_size=50, gene_pool=None, n_bins=25, score_name='femaleness', random_state=0, copy=False, use_raw=False)
	
	sc.pl.violin(adata, keys=['maleness', 'femaleness'], groupby='age', save='_gender_plot.png', show=False, ax=None)
	
	## Identify highly-variable genes based on dispersion relative to expression level.
	sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=6, min_disp=0.2)
	
	## Filter the genes to remove non-variable genes since they are uninformative
	adata = adata[:, adata.var['highly_variable']]
	
	## Regress out effects of total reads per cell and the percentage of mitochondrial genes expressed.
	sc.pp.regress_out(adata, ['n_counts', 'S_score', 'G2M_score'])
	
	## Scale each gene to unit variance. Clip values exceeding standard deviation 10 to remove extreme outliers
	sc.pp.scale(adata, max_value=10)
	
	## Run PCA to compute the default number of components
	sc.tl.pca(adata, svd_solver='arpack')
	
	## Rank genes according to contributions to PCs.
	sc.pl.pca_loadings(adata, show=False, components=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], save='_PCA-loadings.png')
	
	## Draw the PCA elbow plot to determine which PCs to use
	sc.pl.pca_variance_ratio(adata, log=True, n_pcs = 100, save = '_elbowPlot.png', show = False)
	
	## Compute nearest-neighbors
	sc.pp.neighbors(adata, n_neighbors=num_neighbors_use, n_pcs=num_pcs_use)
	
	## fix batch differences based on XX/XY
	bbknn.bbknn(adata, batch_key='sampleName', n_pcs=75, neighbors_within_batch=3, copy=False)
	
	## Calculate cell clusters via Louvain algorithm
	sc.tl.louvain(adata, resolution = louv_res)
	
	
	sc.tl.paga(adata, groups='louvain')
	sc.pl.paga(adata, color='louvain', save=False, show=False, threshold=threshold, node_size_scale=node_size_scale, node_size_power=0.9, layout=paga_layout)
	
	sc.tl.umap(adata, init_pos='paga', min_dist=umap_min_dist, maxiter=maxiter, spread=umap_spread, gamma=umap_gamma, random_state=random_state)
	#sc.tl.umap(adata, init_pos='spectral', min_dist=umap_min_dist, maxiter=maxiter, spread=umap_spread, gamma=umap_gamma, random_state=random_state)
	
	## Run tSNE algorithm
	
	sc.tl.tsne(adata, n_pcs=num_pcs_use)
	
	## Run draw_graph to get a FA2 graph layout
	
	sc.tl.draw_graph(adata,layout='fa', init_pos='paga', scalingRatio=4.0)
	
	
	
	sc.pl.umap(adata, color='louvain', save = '_clusterIdentity_noEdge.png', show = False, legend_loc = 'on data', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6)
	sc.pl.umap(adata, color=['louvain', 'age'], save = '_clusterIdentity_age.png', show = False, legend_loc = 'right margin', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6)
	sc.pl.umap(adata, color='age', save = '_age.png', show = False, legend_loc = 'right margin', edges = False, size = dot_size, palette = greatestPalette, alpha = 0.95)
	sc.pl.umap(adata, color='sex', save = '_sex.png', show = False, legend_loc = 'right margin', edges = False, size = dot_size, palette = greatestPalette, alpha = 0.95)
	sc.pl.umap(adata, color='sampleName', save = '_sample.png', show = False, legend_loc = 'right margin', edges = False, size = dot_size, palette = greatestPalette, alpha = 0.95)
	sc.pl.umap(adata, color=['n_genes','n_counts','percent_mito'], save = '_stats.png', show = False, edges = False, cmap = my_feature_cmap, size = dot_size+10)
	
	sc.pl.tsne(adata, color='louvain', save = '_clusterIdentity_noEdge.png', show = False, legend_loc = 'on data', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6)
	sc.pl.tsne(adata, color=['louvain', 'age'], save = '_clusterIdentity_age.png', show = False, legend_loc = 'right margin', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6)
	
	sc.pl.draw_graph(adata, color='louvain', save = '_clusterIdentity_noEdge.png', show = False, legend_loc = 'on data', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6)
	sc.pl.draw_graph(adata, color=['louvain', 'age'], save = '_clusterIdentity_age.png', show = False, legend_loc = 'right margin', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6)
	
	sc.pl.paga(adata, color='louvain', save=''.join(['_', paga_layout, '_page.png']), show=False, threshold=threshold, node_size_scale=node_size_scale, node_size_power=0.9, layout=paga_layout)
	
	'''
	sc.tl.tsne(adata, n_pcs=num_pcs_use, use_rep='X_pca', perplexity=30, early_exaggeration=12, learning_rate=1000, random_state=random_state, use_fast_tsne=True, n_jobs=10, copy=False)
	
	sc.pl.tsne(adata, color='louvain', save = '_clusterIdentity_noEdge.png', show = False, legend_loc = 'on data', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6)
	sc.pl.tsne(adata, color=['louvain', 'age'], save = '_clusterIdentity_age.png', show = False, legend_loc = 'right margin', edges = False, edges_color = 'lightgrey', edges_width = 0.01, size = dot_size, palette = greatestPalette, alpha = 0.95, legend_fontsize=6)
	sc.pl.tsne(adata, color='age', save = '_age.png', show = False, legend_loc = 'right margin', edges = False, size = dot_size, palette = greatestPalette, alpha = 0.95)
	sc.pl.tsne(adata, color='sex', save = '_sex.png', show = False, legend_loc = 'right margin', edges = False, size = dot_size, palette = greatestPalette, alpha = 0.95)
	sc.pl.tsne(adata, color='sampleName', save = '_sample.png', show = False, legend_loc = 'right margin', edges = False, size = dot_size, palette = greatestPalette, alpha = 0.95)
	sc.pl.tsne(adata, color=['n_genes','n_counts','percent_mito'], save = '_stats.png', show = False, edges = False, cmap = my_feature_cmap, size = dot_size+10)
	'''
	
	sc.tl.rank_genes_groups(adata, 'louvain', method='wilcoxon', n_genes=100, use_raw=True)
	#sc.tl.filter_rank_genes_groups(adata, groupby='louvain', use_raw=True, log=True, key_added='rank_genes_groups_filtered', min_in_group_fraction=0.05, min_fold_change=1, max_out_group_fraction=0.95)
	sc.pl.rank_genes_groups_dotplot(adata, key='rank_genes_groups', groupby='louvain', mean_only_expressed=True,  n_genes=6, save = '_markerDotPlots.png', show = False, color_map=my_dot_cmap, dendrogram=True)
	mjc.write_marker_file(adata, file_out=''.join([figure_dir, '/marker_output.csv']), n_genes=100)
	
	
	expressed_dict = dict()
	for gene in adata.raw.var_names.values.tolist():
		if gene not in expressed_dict:
			expressed_dict[str(gene)] = 1
	
	genes_to_plot = []
	for gene in genes_of_interest:
		if gene in expressed_dict:
			genes_to_plot.append(gene)
		else:
			print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')
	
	print('Plotting genes:', ', '.join(genes_to_plot),'\n')
	sc.pl.umap(adata, color=genes_to_plot, save = '_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True)
	
	genes_to_plot = []
	for gene in epi_cell_type_genes:
		if gene in expressed_dict:
			genes_to_plot.append(gene)
		else:
			print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')
	
	print('Plotting genes:', ', '.join(genes_to_plot),'\n')
	sc.pl.umap(adata, color=genes_to_plot, save = '_epi_cell_types_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True)
	
	
	genes_to_plot = []
	for gene in emilys_list:
		if gene in expressed_dict:
			genes_to_plot.append(gene)
		else:
			print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')
	
	print('Plotting genes:', ', '.join(genes_to_plot),'\n')
	sc.pl.umap(adata, color=genes_to_plot, save = '_emilysGenes_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True)
	
	
	genes_to_plot = []
	for gene in fig_genes:
		if gene in expressed_dict:
			genes_to_plot.append(gene)
		else:
			print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')
	
	print('Plotting genes:', ', '.join(genes_to_plot),'\n')
	sc.pl.umap(adata, color=genes_to_plot, save = '_fig2_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True)
	sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save='_figure2_DotPlots.png', standard_scale='var', show=False, color_map=my_dot_cmap, dendrogram=False)
	#sc.pl.tsne(adata, color=genes_to_plot, save = '_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True)
	
	fig_1D_genes = ['DCN','COL1A1','COL1A2','RGS5','PDGFRB','ANO1','KIT','ACTA2','TAGLN','PDGFRA','DLL1','F3','NPY','GPX3']
	genes_to_plot = []
	for gene in fig_1D_genes:
		if gene in expressed_dict:
			genes_to_plot.append(gene)
		else:
			print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')
	
	print('Plotting genes:', ', '.join(genes_to_plot),'\n')
	sc.pl.dotplot(adata, genes_to_plot, color_map = my_feature_cmap, groupby='louvain', var_group_positions=[(0,2),(3,4),(5,6),(7,8),(9,13)], var_group_labels=['Fibroblasts','Vasc. SMCs','ICCs','SMCs','Submucosal'], var_group_rotation=45, use_raw=True, log=True, dendrogram=True, expression_cutoff=expression_cutoff, mean_only_expressed=True, show=False, save='_fig_1D.png')
	sc.pl.dotplot(adata, genes_to_plot, color_map = my_feature_cmap, groupby='louvain', var_group_positions=[(0,2),(3,4),(5,6),(7,8),(9,13)], var_group_labels=['Fibroblasts','Vasc. SMCs','ICCs','SMCs','Submucosal'], var_group_rotation=45, use_raw=True, log=True, dendrogram=True, expression_cutoff=expression_cutoff, mean_only_expressed=True, show=False, save='_fig_1D.pdf')
	sc.pl.umap(adata, color=genes_to_plot, save = '_fig1D_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True)
	
	genes_to_plot = []
	for gene in y_chrom_genes:
		if gene in expressed_dict:
			genes_to_plot.append(gene)
		else:
			print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')
	
	print('Plotting genes:', ', '.join(genes_to_plot),'\n')
	sc.pl.umap(adata, color=genes_to_plot, save = '_y_chrom_featurePlots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True)
	#sc.pl.tsne(adata, color=genes_to_plot, save = '_y_chrom_featurePlots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True)
	#sc.tl.dendrogram(adata, 'louvain', n_pcs=num_pcs_use, use_raw=True, cor_method='pearson', linkage_method='complete', key_added='dendrogram_louvain')
	sc.tl.rank_genes_groups(adata, 'louvain', method='wilcoxon', n_genes=50, use_raw=True)
	sc.tl.filter_rank_genes_groups(adata, groupby='louvain', use_raw=True, log=True, key_added='rank_genes_groups_filtered', min_in_group_fraction=0.25, min_fold_change=1.5, max_out_group_fraction=0.5)
	sc.pl.rank_genes_groups_dotplot(adata, key='rank_genes_groups_filtered', groupby='louvain', mean_only_expressed=True,  n_genes=10, save = '_markerDotPlots.png', show = False, color_map=my_dot_cmap, dendrogram=True)
	
	
	fig_2A_genes = ['ACTA2','TAGLN','DLL1','F3','NPY','GPX3']
	genes_to_plot = []
	for gene in fig_2A_genes:
		if gene in expressed_dict:
			genes_to_plot.append(gene)
		else:
			print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')
	
	print('Plotting genes:', ', '.join(genes_to_plot),'\n')
	sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='age', use_raw=True, log=True, mean_only_expressed=True, expression_cutoff=1.0, save = '_fig2A_DotPlot.png', standard_scale='var', smallest_dot=0, show = False, color_map=my_dot_cmap, dendrogram=False)
	sc.pl.matrixplot(adata, var_names=genes_to_plot, groupby='age', use_raw=True, log=False, save = '_fig2A_MatrixPlot.png', show = False)
	sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig2B_DotPlot.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True)
	sc.pl.umap(adata, color=genes_to_plot, save = '_fig2B_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True)
	
	
	
	fig_3A_genes = ['ACTA2','TAGLN','F3','NPY','GPX3','WNT2B','RSPO2','RSPO3','NOG','CHRD','EGF']
	genes_to_plot = []
	for gene in fig_3A_genes:
		if gene in expressed_dict:
			genes_to_plot.append(gene)
		else:
			print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')
	
	print('Plotting genes:', ', '.join(genes_to_plot),'\n')
	sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', use_raw=True, log=False, mean_only_expressed=True, save = '_fig3A_DotPlot.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.5)
	sc.pl.umap(adata, color=genes_to_plot, save = '_fig3A_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True)
	
	crypt_SEC_genes = ['F3','DLL1','COL15A1','NRG1','CH25H','MMP11','CXCR4','CPM','BMP3','IGFBP5','ADAMDEC1','CTGF','CRY61','IGFBP3','HHIP','EFEMP1','NPY','CTCSC','NBEAL1','EIF5A','RPSAP58']
	genes_to_plot = []
	for gene in crypt_SEC_genes:
		if gene in expressed_dict:
			genes_to_plot.append(gene)
		else:
			print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')
	
	print('Plotting genes:', ', '.join(genes_to_plot),'\n')
	sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', use_raw=True, log=False, mean_only_expressed=True, save = '_crypt_SEC_DotPlot.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True)
	sc.pl.umap(adata, color=genes_to_plot, save = '_crypt_SEC_featureplots.png', show = False, cmap = my_feature_cmap, size = dot_size*3, use_raw = True)
	
	
	fig_3B_genes = ['LGR5','OLFM4','FABP2','SI','DPP4','F3','NPY','ACTA2','TAGLN','NRG1','NRG2','NRG3','NRG4','TGFA','HBEGF','AREG','BTC','EPGN','EREG','EGFR','ERBB2','ERBB3','ERBB4']
	genes_to_plot = []
	for gene in fig_3B_genes:
		if gene in expressed_dict:
			genes_to_plot.append(gene)
		else:
			print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')
	
	print('Plotting genes:', ', '.join(genes_to_plot),'\n')
	sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig3B_DotPlot_logScale.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.25, log=True)
	sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig3B_DotPlot_linearScale.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.25, log=False)
	
	
	fig_3B1_genes = ['LGR5','OLFM4']
	genes_to_plot = []
	for gene in fig_3B1_genes:
		if gene in expressed_dict:
			genes_to_plot.append(gene)
		else:
			print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')
	
	print('Plotting genes:', ', '.join(genes_to_plot),'\n')
	sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig3B1_DotPlot_logScale.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.25, log=True)
	
	fig_3B2_genes = ['FABP2','SI','DPP4']
	genes_to_plot = []
	for gene in fig_3B2_genes:
		if gene in expressed_dict:
			genes_to_plot.append(gene)
		else:
			print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')
	
	print('Plotting genes:', ', '.join(genes_to_plot),'\n')
	sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig3B2_DotPlot_logScale.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.25, log=True)
	
	
	fig_3B3_genes = ['F3','NPY','ACTA2','TAGLN']
	genes_to_plot = []
	for gene in fig_3B3_genes:
		if gene in expressed_dict:
			genes_to_plot.append(gene)
		else:
			print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')
	
	print('Plotting genes:', ', '.join(genes_to_plot),'\n')
	sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig3B3_DotPlot_logScale.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.25, log=True)
	
	
	
	fig_3B4_genes = ['NRG1','NRG2','NRG3','NRG4','TGFA','HBEGF','AREG','BTC','EPGN','EREG']
	genes_to_plot = []
	for gene in fig_3B4_genes:
		if gene in expressed_dict:
			genes_to_plot.append(gene)
		else:
			print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')
	
	print('Plotting genes:', ', '.join(genes_to_plot),'\n')
	sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig3B4_DotPlot_logScale.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.25, log=True)
	
	
	fig_3B5_genes = ['EGFR','ERBB2','ERBB3','ERBB4']
	genes_to_plot = []
	for gene in fig_3B5_genes:
		if gene in expressed_dict:
			genes_to_plot.append(gene)
		else:
			print('Sorry,', gene, 'Is not expressed in this dataset or is invariable.\n')
	
	print('Plotting genes:', ', '.join(genes_to_plot),'\n')
	sc.pl.dotplot(adata, var_names=genes_to_plot, groupby='louvain', mean_only_expressed=True, save = '_fig3B5_DotPlot_logScale.png', standard_scale='var', show = False, color_map=my_dot_cmap, dendrogram=True, dot_max=0.25, log=True)
	
	return(adata)