def plot_by_genes(rdata, tenx_analysis, genes, prefix, rep, pcs): tenx = TenxAnalysis(tenx_analysis) tenx.load() sce = SingleCellExperiment.fromRData(rdata) tsne_dims = sce.getReducedDims(rep) barcodes = sce.colData["Barcode"] transcripts = sce.rowData["Symbol"] adata = tenx.create_scanpy_adata(barcodes=barcodes, transcripts=transcripts) x_coded = dict(zip(barcodes, tsne_dims[0])) y_coded = dict(zip(barcodes, tsne_dims[1])) if not os.path.exists("figures/expression"): os.makedirs("figures/expression") x = [] y = [] for barcode in barcodes: x.append(x_coded[barcode]) y.append(y_coded[barcode]) for gene in genes: expression = [] for barcode in barcodes: val = adata[barcode, gene].X expression.append(float(val)) f, ax = plt.subplots(figsize=(10, 8)) sns.scatterplot(x=x, y=y, hue=expression, alpha=0.85) ax.set_title("{} Counts".format(gene)) ax.legend() plt.tight_layout() plt.savefig("figures/expression/expression_{}.png".format(gene))
def umap_by_gene(rdata, gene, prefix, pcs): tenx = TenxAnalysis(tenx_analysis) tenx.load() sce = SingleCellExperiment.fromRData(rdata) tsne_dims = sce.reducedDims["UMAP"] barcodes = sce.colData["Barcode"] transcripts = sce.rowData["Symbol"] adata = tenx.create_scanpy_adata(barcodes=barcodes, transcripts=symbols) assert len(barcodes) == len(adata[:, gene]) expression = dict(zip(barcodes, adata[:, gene])) tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes)) x_coded = dict(zip(barcodes, tsne_dims[0])) y_coded = dict(zip(barcodes, tsne_dims[1])) x = [] y = [] clusters = [] for barcode in barcodes: clusters.append(float(expression[barcode])) x.append(x_coded[barcode]) y.append(y_coded[barcode]) f, ax = plt.subplots(figsize=(10, 8)) sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85) ax.set_title("PCA - Clusters - {}".format(prefix)) ax.legend() plt.tight_layout() plt.savefig("figures/umap_by_{}.png".format(gene))
def upload_tenx(sampleid, before, finished): print("Calling upload.") tenx = TenxAnalysis("./{}/outs/".format(sampleid)) tenx.finalize() tenxds = TenxDataStorage(sampleid) tenxds.upload_cellranger(tenx) open(finished, "w").write("Completed")
def Run(sampleid, finished): if not os.path.exists("cellranger.complete"): CellRanger.count([sampleid]) tenx = TenxAnalysis("./{}/outs/".format(sampleid)) tenx.finalize() tenxds = TenxDataStorage(sampleid) tenxds.upload_cellranger(tenx) open(finished,"w").write("Completed")
def test_tenx_full_analysis(self): tenx = TenxAnalysis("tests") print("Reading Counts") sce = TenX.read10xCounts(tenx) # rdata = os.path.join(base_dir, "tests/example_sce.RData") # sce = SingleCellExperiment.fromRData(rdata) tenx = TenX() print("Generating Scater Analysis") scater_analysis = tenx.analysis(sce)
def RunUpload(sampleid, finished, species): print("Uploading ",species, sampleid) tenx_output = os.path.join(config.jobpath,"{}/outs/".format(sampleid)) tenx = TenxAnalysis(tenx_output) tenx.finalize() tenxds = TenxDataStorage(sampleid, species=species) print("Running upload") tenxds.upload_cellranger(tenx) open(finished,"w").write("Completed")
def cluster_markers(rdata, tenx_analysis, rep, pcs, embedding_file, prefix): tenx = TenxAnalysis(tenx_analysis) tenx.load() sce = SingleCellExperiment.fromRData(rdata) markers = tenx.markers_by_clusters(sce, rep="PCA", pcs=pcs) markers_by_cluster = list(zip(*markers["rank_genes_groups"]["names"])) for i, markers in enumerate(markers_by_cluster): cluster_prefix = "Cluster {} {}".format(i, prefix) plot_by_markers(rdata, tenx_analysis, markers, cluster_prefix, rep, pcs, embedding_file)
def scvis_by_cluster_markers(rdata, tenx_analysis, prefix, pcs, embedding_file): try: tenx = TenxAnalysis(tenx_analysis) tenx.load() sce = SingleCellExperiment.fromRData(rdata) cluster_labels = tenx.markers_by_clusters( sce, rep="SCVIS", pcs=pcs, embedding_file=embedding_file) except Exception as e: return
def Run(sampleid, before, finished): adatas = Search(sampleid) print ("Correcting on {} samples.".format(len(adatas))) sys.stdout.flush() corrected = Scanorama.correct(adatas) sys.stdout.flush() if not os.path.exists(".cache/corrected"): os.makedirs(".cache/corrected") TenxAnalysis.make_10x_output(corrected[0],".cache/corrected") open(finished,"w").write("Completed")
def main(): sample = "patient2" tenx = TenxDataStorage(sample, version="v2") tenx.download() tenx_analysis = TenxAnalysis(tenx.tenx_path) tenx_analysis.load() output = "/igo_large/scratch/test_kallisto" fastq_directory = FastQDirectory( "/igo_large/scratch/allen/bams/xfastqs2/McGilvery_Sonya__TLH_MissingLibrary_1_CB8R9ANXX/", sample, output) krunner = Kallisto(fastq_directory, tenx_analysis) krunner.de()
def cell_type_by_cluster(rdata, cell_assign_fit, tenx_analysis, prefix): tenx = TenxAnalysis(tenx_analysis) tenx.load() fit = pickle.load(open(cell_assign_fit, "rb")) cell_types = dict(zip(fit["Barcode"], fit["cell_type"])) sce = SingleCellExperiment.fromRData(rdata) cluster_labels = tenx.clusters(sce) clusters = dict(zip(sce.colData["Barcode"], cluster_labels)) data_by_cluster = collections.defaultdict(list) data_by_celltype = collections.defaultdict(list) cluster = [] cell_type = [] for barcode, cell in cell_types.items(): try: cluster.append(str(clusters[barcode])) cell_type.append(cell) data_by_celltype[cell] = str(clusters[barcode]) data_by_cluster[str(clusters[barcode])] = cell except Exception as e: continue f, ax = plt.subplots(figsize=(16, 8)) counts = collections.defaultdict(lambda: collections.defaultdict(int)) for cluster, ctype in zip(cluster, cell_type): counts[cluster][ctype] += 1 fclusters = [] fcelltypes = [] fpercentages = [] for cluster, ctype in counts.items(): total = float(sum(ctype.values())) for cell in cell_type: fcelltypes.append(cell) fclusters.append(cluster) if cell in ctype: fpercentages.append(float(ctype[cell]) / total) else: fpercentages.append(0.0) df = pandas.DataFrame({ "Cluster": fclusters, "Cell Type": fcelltypes, "Percentage": fpercentages }) ax = sns.barplot(x="Cluster", y="Percentage", hue="Cell Type", data=df, palette="tab10") ax.set_title("Cell Type by Cluster - {}".format(prefix)) plt.tight_layout() plt.savefig("figures/cell_type_by_cluster.png")
def test_symbol_retrieve(self): tenx = TenxAnalysis("tests/pre_igo") sce = TenX.read10xCounts(tenx) print(sce.rowData.keys()) example_rda = os.path.join(base_dir, "tests/example_sce.rda") sce = SingleCellExperiment.fromRData(example_rda) print(sce.rowData.keys()) tenx = DropletUtils() rs4_result = tenx.read10xCounts("tests/hg19/") sce = SingleCellExperiment.fromRS4(rs4_result) print(sce.rowData.keys()) example_rda = os.path.join(base_dir, "tests/example_copy_number.rda") sce = SingleCellExperiment.fromRData(example_rda) print(sce.rowData.keys()) print(sce.rownames) print(sce.colnames)
def get_tenx(samples): tenxs = [] for sample in samples: tenx = TenxDataStorage(sample) tenx.download() tenxs.append(TenxAnalysis(tenx.tenx_path)) return tenxs
def count(fastq_object): args = dict() args["id"] = fastq_object.id args["fastqs"] = fastq_object.path args["sample"] = fastq_object.samples.sampleid[0] args["transcriptome"] = config.reference args["lanes"] = fastq_object.samples.lane[0] #args["chemistry"] = "SC3P_auto" cmd = CellRanger.cmd("count",args) subprocess.call(cmd) return TenxAnalysis(fastq_object.out())
def test_cell_assign_pkl(self): import pickle import collections tenx = TenxAnalysis("tests/pre_igo") sce = TenX.read10xCounts(tenx) handle = open("tests/rho_up.pkl","rb") rho_matrix = pickle.load(handle) handle.close() rho = GeneMarkerMatrix(rho_matrix) cellassigner = CellAssign() res = cellassigner.run_em(sce, rho)
def Analysis(sampleid, before, finished): tenx = TenxDataStorage(sampleid, version="v3") tenx.download() analysis_path = tenx.tenx_path tenx_analysis = TenxAnalysis(analysis_path) tenx_analysis.load() tenx_analysis.extract() qc = QualityControl(tenx_analysis, sampleid) cellassign_analysis = ".cache/{}/cellassignanalysis/".format(sampleid) if not os.path.exists(cellassign_analysis): os.makedirs(cellassign_analysis) pyfit = os.path.join(".cache/{}/cell_types.pkl".format(sampleid)) assert os.path.exists(pyfit), "No Pyfit Found." pyfit = pickle.load(open(pyfit, "rb")) marker_list = GeneMarkerMatrix.read_yaml(config.rho_matrix) cell_types = marker_list.celltypes() if "B cell" not in cell_types: cell_types.append("B cell") celltypes(pyfit, sampleid, cellassign_analysis, known_types=cell_types) tsne_by_cell_type(qc.sce, pyfit, sampleid, cellassign_analysis, known_types=cell_types) umap_by_cell_type(qc.sce, pyfit, sampleid, cellassign_analysis, known_types=cell_types) open(finished, "w").write("Completed")
def Run(sampleid, species, umi_plot, mito_plot, ribo_plot, counts_plot, raw_sce): print("Running QC.") tenx = TenxDataStorage(sampleid) tenx_analysis = TenxAnalysis(tenx.tenx_path) tenx_analysis.load() tenx_analysis.extract() qc = QualityControl(tenx_analysis, sampleid) qc.run(mito=config.mito) plots = qc.plots umi = os.path.join(plots, "umi.png") mito = os.path.join(plots, "mito.png") ribo = os.path.join(plots, "ribo.png") counts = os.path.join(plots, "counts.png") cvf = os.path.join(plots, "total_counts_v_features.png") results = os.path.join(config.jobpath, "results") if not os.path.exists(results): os.makedirs(results) shutil.copyfile(umi, umi_plot) shutil.copyfile(mito, mito_plot) shutil.copyfile(ribo, ribo_plot) shutil.copyfile(counts, counts_plot) shutil.copyfile(qc.sce, raw_sce)
def Run(sampleid, before, finished): tenx = TenxDataStorage(sampleid, version="v3") tenx.download() tenx_analysis = TenxAnalysis(tenx.tenx_path) tenx_analysis.load() tenx_analysis.extract() qc = QualityControl(tenx_analysis,sampleid) plots = qc.plots cellassign = os.path.join(os.path.split(plots)[0],"cellassignanalysis") results = Results(config.jobpath) results.add_analysis(tenx.tenx_path) results.add_sce(qc.qcdsce) umi = os.path.join(plots,"umi.png") mito = os.path.join(plots,"mito.png") ribo = os.path.join(plots, "ribo.png") total_counts = os.path.join(plots, "total_counts.png") tfbc = os.path.join(plots, "total_features_by_counts.png") tcvfc = os.path.join(plots, "total_counts_v_features_by_counts.png") celltypes = os.path.join(cellassign, "cell_types.png") results.add_plot(umi,"UMI Distribution") results.add_plot(mito,"Mito Distribution") results.add_plot(ribo,"Ribo Distribution") results.add_plot(total_counts,"Total Counts Distribution") results.add_plot(tcvfc,"Total Counts") results.add_plot(tcvfc,"Total Features by Counts") results.add_plot(celltypes,"Cell Types") exportMD(results) exportUpload(results) open(finished,"w").write("Completed")
def RunExtract(sample_to_path, rdata_path): sample = json.loads(open(sample_to_path, "r").read()) sampleid, path = list(sample.items()).pop() tenx_analysis = TenxAnalysis(path) tenx_analysis.load() tenx_analysis.extract() qc = QualityControl(tenx_analysis, sampleid) if not os.path.exists(qc.sce): qc.run(mito=config.mito) shutil.copyfile(qc.sce, rdata_path)
def Run(sampleid, before, finished): tenx = TenxDataStorage(sampleid, version="v3") tenx.download() analysis_path = tenx.tenx_path tenx_analysis = TenxAnalysis(analysis_path) tenx_analysis.load() tenx_analysis.extract() qc = QualityControl(tenx_analysis, sampleid) CellAssign.run(qc.sce, config.rho_matrix, ".cache/{}/celltypes.rdata".format(sampleid)) open(finished, "w").write("Completed")
def __init__(self, sampleids, chem="v2", output="./"): self.output = output self.samples = sampleids self.tenxs = [] for sampleid in self.samples: tenx = TenxDataStorage(sampleid, version=chem) tenx.download() tenx_analysis = TenxAnalysis(tenx.tenx_path) tenx_analysis.load() tenx_analysis.extract() self.tenxs.append(tenx_analysis)
def Run(sampleid, before, finished): print("Running QC.") tenx = TenxDataStorage(sampleid, version="v3") tenx.download() tenx_analysis = TenxAnalysis(tenx.tenx_path) tenx_analysis.load() tenx_analysis.extract() print("Extracted.") qc = QualityControl(tenx_analysis, sampleid) qc.run(mito=config.mito) print("Uploading") qc.upload_raw() qc.upload() open(finished, "w").write("Completed")
def Run(sampleid, before, finished): clustering = ".cache/{}/clustering/".format(sampleid) if not os.path.exists(clustering): os.makedirs(clustering) cluster_results = os.path.join(clustering, "{}_clusters.pkl".format(sampleid)) tenx = TenxDataStorage(sampleid, version="v3") tenx.download() analysis_path = tenx.tenx_path tenx_analysis = TenxAnalysis(analysis_path) tenx_analysis.load() tenx_analysis.extract() qc = QualityControl(tenx_analysis, sampleid) if not os.path.exists(cluster_results): clusters = tenx_analysis.clusters(qc.sce) pickle.dump(clusters, open(cluster_results, "wb")) else: clusters = pickle.load(open(cluster_results, "rb")) tsne_by_cluster(qc.sce, clusters, sampleid, clustering) umap_by_cluster(qc.sce, clusters, sampleid, clustering) open(finished, "w").write("Completed")
def Run(sampleid, before, finished, use_corrected=False): if use_corrected and os.path.exists(".cache/corrected/"): sce = ".cache/corrected/corrected_sce.rdata" if not os.path.exists(sce): utils = DropletUtils() utils.read10xCounts(".cache/corrected/", ".cache/corrected/corrected_sce.rdata") else: tenx = TenxDataStorage(sampleid, version="v3") tenx.download() analysis_path = tenx.tenx_path tenx_analysis = TenxAnalysis(analysis_path) tenx_analysis.load() tenx_analysis.extract() qc = QualityControl(tenx_analysis, sampleid) sce = qc.sce if not os.path.exists(".cache/{}/celltypes.rdata".format(sampleid)): CellAssign.run(sce, config.rho_matrix, ".cache/{}/celltypes.rdata".format(sampleid)) open(finished, "w").write("Completed")
def Analysis(sampleid, before, finished, use_corrected=False): if use_corrected and os.path.exists(".cache/corrected"): sce = ".cache/corrected/corrected_sce.rdata" if not os.path.exists(sce): utils = DropletUtils() utils.read10xCounts(".cache/corrected/", ".cache/corrected/corrected_sce.rdata") filtered_sce = sce else: tenx = TenxDataStorage(sampleid, version="v3") tenx.download() analysis_path = tenx.tenx_path tenx_analysis = TenxAnalysis(analysis_path) tenx_analysis.load() tenx_analysis.extract() qc = QualityControl(tenx_analysis, sampleid) filtered_sce = os.path.join(os.path.split(qc.sce)[0], "sce_cas.rdata") cellassign_analysis = ".cache/{}/cellassignanalysis/".format(sampleid) if not os.path.exists(cellassign_analysis): os.makedirs(cellassign_analysis) pyfit = os.path.join(".cache/{}/cell_types.pkl".format(sampleid)) assert os.path.exists(pyfit), "No Pyfit Found." pyfit = pickle.load(open(pyfit, "rb")) marker_list = GeneMarkerMatrix.read_yaml(config.rho_matrix) cell_types = marker_list.celltypes() if "B cell" not in cell_types: cell_types.append("B cell") celltypes(pyfit, sampleid, cellassign_analysis, known_types=cell_types) tsne_by_cell_type(filtered_sce, pyfit, sampleid, cellassign_analysis, known_types=cell_types) umap_by_cell_type(filtered_sce, pyfit, sampleid, cellassign_analysis, known_types=cell_types) open(finished, "w").write("Completed")
def create_workflow(): workflow = pypeliner.workflow.Workflow() bcl_directory = args.get("bcl", None) fastq_directories = args.get("fastqs") aggregate = args.get("aggregate_mlibs", list()) agg_type = args.get("agg_method", "scanorama") libbase = args.get("lib_base", None) additional = args.get("additional", []) prefix = config.prefix output = config.jobpath recipe = args.get("recipe", "basic") try: cellranger_folder = os.path.join(output, prefix) os.makedirs(cellranger_folder) except Exception as e: pass if fastq_directories == None: fastq_directories = [] results = Results(output) runner = PrimaryRun(workflow, prefix, output) """ Aggregating Libraries """ if aggregate != None and len(aggregate) > 0: if agg_type == "tenx": runner.aggregate_libraries_tenx(aggregate, libbase) args["tenx"] = os.path.join(output, "run_{}/outs".format(prefix)) if agg_type == "scanorama": runner.aggregate_libraries_scanorama() """ Setup """ tenx_analysis = args.get("tenx", None) bcls = runner.set_bcl(bcl_directory) fastqs = runner.set_fastq(fastq_directories) workflow = runner.get_workflow() tenx_analysis = args.get("tenx", None) if fastqs != []: tenx_analysis = os.path.join(config.jobpath, prefix, "outs") rdata = args.get("rdata", None) secondary_analysis = SecondaryAnalysis(workflow, prefix, output) tenx = TenxAnalysis(tenx_analysis) """ QC """ secondary_analysis.run_scater() secondary_analysis.build_sce(tenx) secondary_analysis.set_rdata(rdata) results.add_analysis(tenx_analysis) results.add_workflow(secondary_analysis.rscript) results.add_sce(secondary_analysis.sce) umi = os.path.join(output, "figures/umi_distribution.png") mito = os.path.join(output, "figures/mito_distribution.png") ribo = os.path.join(output, "figures/ribo_distribution.png") freq = os.path.join(output, "figures/highestExprs.png") tech = os.path.join(output, "figures/mean_variance_trend.png") high_var = os.path.join(output, "figures/highly_variable_genes.png") results.add_plot(umi, "UMI Distribution") results.add_plot(mito, "Mito Distribution") results.add_plot(ribo, "Ribo Distribution") results.add_plot(freq, "Highest Frequency") results.add_plot(tech, "Mean Variance Trend") results.add_plot(high_var, "Highly Variable Genes") results.add_cellassign_pkl(secondary_analysis.cell_assign_fit) results.add_cellassign_raw(secondary_analysis.cell_assign_rdata) """ Differential Expression """ if config.run_de: other_samples = [] for other_sample in compare: print("blah") exit(0) secondary_analysis.run_de(other_sample) """ CellAssign """ if config.run_cellassign: tenx = TenxAnalysis(tenx_analysis) if hasattr(config, "rho_matrix"): rho_matrix = eval(open(config.rho_matrix, "r").read()) elif hasattr(config, "tissue"): sce = SingleCellExperiment.fromRData(secondary_analysis.sce) rho_matrix = generate_json(tenx, sce, config.organ) else: raise AssertionError("Not implemented.") secondary_analysis.run_cell_assign(rho_matrix, tenx_analysis, additional=combine_assign) results.add_cellassign_pkl(secondary_analysis.cell_assign_fit) results.add_cellassign_raw(secondary_analysis.cell_assign_rdata) path = secondary_analysis.plot_cell_types() results.add_plot(path, "Cell Type Frequency") path = secondary_analysis.plot_cell_type_by_cluster(tenx_analysis) results.add_plot(path, "Cell Type by Cluster") path = secondary_analysis.plot_tsne_by_cell_type() results.add_plot(path, "TSNE by Cell Type") path = secondary_analysis.plot_pca_by_cell_type() results.add_plot(path, "PCA by Cell Type") # path = secondary_analysis.plot_umap_by_cell_type() # results.add_plot(path, "UMAP by Cell Type") path1, path2 = secondary_analysis.marker_analysis(tenx, rho_matrix) results.add_plot(path1, "Heat Marker Gene Matrix") results.add_plot(path2, "Stacked Vin Marker Gene Matrix") """ SCVis """ if config.run_scvis: secondary_analysis.run_scviz(config.perplexity, config.components) """ CloneAlign """ if config.run_clonealign and config.copy_number_data is not None and config.clone_assignments is not None: secondary_analysis.run_clone_align(tenx, config.copy_number_data, config.clone_assignments) if config.plot_scvis: embedding_file = "{0}_{1}/perplexity_{0}_regularizer_0.001_batch_size_512_learning_rate_0.01_latent_dimension_2_activation_ELU_seed_1_iter_3000.tsv".format( config.perplexity, config.components) path = secondary_analysis.plot_scvis_by_cluster(tenx_analysis, embedding_file, pcs=config.components) path = os.path.join(output, path) results.add_plot(path, "SCVis by Cluster") if os.path.exists(config.run_cellassign): path = secondary_analysis.plot_scvis_by_cell_type( embedding_file, pcs=config.components) results.add_plot(path, "SCVIS by Cell Type") """ Cluster Analysis """ if config.clustering: path = secondary_analysis.plot_pca_by_cluster(tenx_analysis, pcs=config.components) results.add_plot(path, "PCA by Cluster") path = secondary_analysis.plot_tsne_by_cluster(tenx_analysis, pcs=config.components) results.add_plot(path, "TSNE by Cluster") path = secondary_analysis.plot_umap_by_cluster(tenx_analysis, pcs=config.components) results.add_plot(path, "UMAP by Cluster") secondary_analysis.plot_cluster_markers(tenx_analysis, rep="PCA", pcs=config.components) pca_cluster_markers = glob.glob("figures/expression/*pca*png") for png in pca_cluster_markers: title = png.split("/")[-1].replace(".png", "").replace( "counts", "gene markers").upper().replace("_", "") results.add_plot(png, title) secondary_analysis.plot_cluster_markers(tenx_analysis, rep="TSNE", pcs=config.components) pca_cluster_markers = glob.glob("figures/expression/*tsne*png") for png in pca_cluster_markers: title = png.split("/")[-1].replace(".png", "").replace( "counts", "gene markers").upper().replace("_", "") results.add_plot(png, title) secondary_analysis.plot_cluster_markers(tenx_analysis, rep="UMAP", pcs=config.components) pca_cluster_markers = glob.glob("figures/expression/*umap*png") for png in pca_cluster_markers: title = png.split("/")[-1].replace(".png", "").replace( "counts", "gene markers").upper().replace("_", "") results.add_plot(png, title) embedding_file = "{0}_{1}/perplexity_{0}_regularizer_0.001_batch_size_512_learning_rate_0.01_latent_dimension_2_activation_ELU_seed_1_iter_3000.tsv".format( config.perplexity, config.components) secondary_analysis.plot_cluster_markers(tenx_analysis, rep="SCVIS", pcs=config.components, embedding_file=embedding_file) pca_cluster_markers = glob.glob("figures/expression/*scvis_5_50*png") for png in pca_cluster_markers: title = png.split("/")[-1].replace(".png", "").replace( "counts", "gene markers").upper().replace("_", "") results.add_plot(png, title) """ Gene Level """ """ Reporting """ if config.report: workflow.transform(name="{}_markdown".format(prefix), func=exportMD, args=(results, )) if config.report: workflow.transform(name="{}_finalize".format(prefix), func=exportFinalize, args=(results, )) workflow = secondary_analysis.get_workflow() return workflow
def plot_by_markers(rdata, tenx_analysis, genes, prefix, rep, pcs, embedding_file=None, k=12): genes = list(genes[:k]) sce = SingleCellExperiment.fromRData(rdata) counts = sce.assays["logcounts"].toarray() tenx = TenxAnalysis(tenx_analysis) if rep == "SCVIS": tsne_dims = tenx.get_scvis_dimensions(embedding_file) else: tsne_dims = sce.getReducedDims(rep) all_genes = tenx.get_genes(sce) barcodes = sce.colData["Barcode"] x_coded = dict(zip(barcodes, tsne_dims[0])) y_coded = dict(zip(barcodes, tsne_dims[1])) if not os.path.exists("figures/expression"): os.makedirs("figures/expression") x = [] y = [] f = plt.figure() drop_rows = [] for i, barcode in enumerate(barcodes): try: x_val = x_coded[barcode] y_val = y_coded[barcode] x.append(x_val) y.append(y_val) except Exception as e: drop_rows.append(i) continue print("Barcode {}".format(barcode)) scale = preprocessing.MinMaxScaler() counts = scale.fit_transform(counts) for i, gene in enumerate(genes): expression = counts[all_genes.index(gene)] plt.subplot(3, 4, i + 1) expression = scale.fit_transform( numpy.array(expression).reshape(-1, 1)) _expression = list(expression.flatten()) expression = [] for i, row in enumerate(_expression): if i not in drop_rows: expression.append(row) print(len(expression)) print(len(barcodes)) print(len(x)) print(len(y)) g = sns.scatterplot(x=x, y=y, hue=expression, palette="RdYlBu_r", alpha=0.7, legend=False, s=4) g.set(xticklabels=[]) g.set(yticklabels=[]) plt.title(gene) print("Gene {}".format(gene)) plt.tight_layout() plt.savefig("figures/expression/{}_counts_{}.png".format( prefix.replace(" ", "_").lower(), rep.lower()))
def umap_by_cluster_markers(rdata, tenx_analysis, prefix, pcs): tenx = TenxAnalysis(tenx_analysis) tenx.load() sce = SingleCellExperiment.fromRData(rdata) markers = tenx.markers_by_clusters(sce, rep="UMAP", pcs=pcs) print(markers.keys())
class Results(object): def __init__(self, output): self.plots = list() self.output = config.jobpath self.report_dir = os.path.join(config.jobpath,"{}_report/".format(config.prefix)) try: os.makedirs(self.report_dir) except Exception as e: pass self.paths = [] def qc_reports(self): for html in glob.glob(os.path.join(self.output, "fastqc/*/*.html")): yield html def add_analysis(self, tenx): self.analysis = TenxAnalysis(tenx) summary = self.analysis.summary() dest = os.path.join(self.report_dir, "summary.html") self.paths.append((summary,dest)) self.summary = "summary.html" def add_workflow(self, script): dest = os.path.join(self.report_dir, os.path.split(script)[1]) self.script = os.path.split(dest)[1] self.paths.append((script,dest)) def add_sce(self, sce): dest = os.path.join(self.report_dir, os.path.split(sce)[1]) self.sce = os.path.split(dest)[1] self.paths.append((sce,dest)) def add_cellassign_pkl(self, pkl): dest = os.path.join(self.report_dir, os.path.split(pkl)[1]) self.pkl = os.path.split(dest)[1] self.paths.append((pkl,dest)) def add_cellassign_raw(self, raw): dest = os.path.join(self.report_dir, os.path.split(raw)[1]) self.raw = os.path.split(dest)[1] self.paths.append((raw,dest)) def add_plot(self, path, header, desc=""): plot = dict() dest = os.path.join(self.report_dir, os.path.split(path)[1]) self.paths.append((path, dest)) plot["path"] = os.path.split(dest)[1] plot["header"] = header plot["desc"] = desc self.plots.append(plot) def finalize(self): for source, dest in self.paths: try: shutil.copyfile(source, dest) except Exception as e: continue def barcode_to_celltype(self): tsv = os.path.join(self.output,"barcode_to_celltype.tsv") output = open(tsv,"w") cell_assignments = pickle.load(open(self.pkl,"rb")) for barcode, ctype in zip(cell_assignments["Barcode"],cell_assignments["cell_type"]): output.write("{}\t{}\n".format(barcode,ctype)) output.close() return tsv
def add_analysis(self, tenx): self.analysis = TenxAnalysis(tenx) summary = self.analysis.summary() dest = os.path.join(self.report_dir, "summary.html") self.paths.append((summary,dest)) self.summary = "summary.html"