def cell_type_by_cluster(rdata, cell_assign_fit, tenx_analysis, prefix): tenx = TenxAnalysis(tenx_analysis) tenx.load() fit = pickle.load(open(cell_assign_fit, "rb")) cell_types = dict(zip(fit["Barcode"], fit["cell_type"])) sce = SingleCellExperiment.fromRData(rdata) cluster_labels = tenx.clusters(sce) clusters = dict(zip(sce.colData["Barcode"], cluster_labels)) data_by_cluster = collections.defaultdict(list) data_by_celltype = collections.defaultdict(list) cluster = [] cell_type = [] for barcode, cell in cell_types.items(): try: cluster.append(str(clusters[barcode])) cell_type.append(cell) data_by_celltype[cell] = str(clusters[barcode]) data_by_cluster[str(clusters[barcode])] = cell except Exception as e: continue f, ax = plt.subplots(figsize=(16, 8)) counts = collections.defaultdict(lambda: collections.defaultdict(int)) for cluster, ctype in zip(cluster, cell_type): counts[cluster][ctype] += 1 fclusters = [] fcelltypes = [] fpercentages = [] for cluster, ctype in counts.items(): total = float(sum(ctype.values())) for cell in cell_type: fcelltypes.append(cell) fclusters.append(cluster) if cell in ctype: fpercentages.append(float(ctype[cell]) / total) else: fpercentages.append(0.0) df = pandas.DataFrame({ "Cluster": fclusters, "Cell Type": fcelltypes, "Percentage": fpercentages }) ax = sns.barplot(x="Cluster", y="Percentage", hue="Cell Type", data=df, palette="tab10") ax.set_title("Cell Type by Cluster - {}".format(prefix)) plt.tight_layout() plt.savefig("figures/cell_type_by_cluster.png")
def Run(sampleid, before, finished): clustering = ".cache/{}/clustering/".format(sampleid) if not os.path.exists(clustering): os.makedirs(clustering) cluster_results = os.path.join(clustering, "{}_clusters.pkl".format(sampleid)) tenx = TenxDataStorage(sampleid, version="v3") tenx.download() analysis_path = tenx.tenx_path tenx_analysis = TenxAnalysis(analysis_path) tenx_analysis.load() tenx_analysis.extract() qc = QualityControl(tenx_analysis, sampleid) if not os.path.exists(cluster_results): clusters = tenx_analysis.clusters(qc.sce) pickle.dump(clusters, open(cluster_results, "wb")) else: clusters = pickle.load(open(cluster_results, "rb")) tsne_by_cluster(qc.sce, clusters, sampleid, clustering) umap_by_cluster(qc.sce, clusters, sampleid, clustering) open(finished, "w").write("Completed")