def test_symbol_retrieve(self): tenx = TenxAnalysis("tests/pre_igo") sce = TenX.read10xCounts(tenx) print(sce.rowData.keys()) example_rda = os.path.join(base_dir, "tests/example_sce.rda") sce = SingleCellExperiment.fromRData(example_rda) print(sce.rowData.keys()) tenx = DropletUtils() rs4_result = tenx.read10xCounts("tests/hg19/") sce = SingleCellExperiment.fromRS4(rs4_result) print(sce.rowData.keys()) example_rda = os.path.join(base_dir, "tests/example_copy_number.rda") sce = SingleCellExperiment.fromRData(example_rda) print(sce.rowData.keys()) print(sce.rownames) print(sce.colnames)
def scvis_by_cluster(rdata, tenx, prefix, pcs, embedding_file): # tenx = TenxAnalysis(tenx_analysis) # tenx.load() sce = SingleCellExperiment.fromRData(rdata) cluster_labels = tenx.clusters(sce, pcs=pcs) rows = open(embedding_file, "r").read().splitlines() dims = [] rows.pop(0) for row in rows: row = row.split("\t") row = list(map(float, row[1:])) dims.append(row) barcodes = sce.colData["Barcode"] print(dims) x = [] y = [] clusters = [] for barcode, dim in zip(barcodes, dims): x.append(dim[0]) y.append(dim[1]) clusters.append("Cluster {}".format(cluster_labels[barcode])) f, ax = plt.subplots(figsize=(10, 8)) sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85) ax.set_title("SCVIS - Clusters - {}".format(prefix)) ax.legend() plt.tight_layout() plt.savefig("{}}/svis_by_cluster.png".format(prefix))
def test_call_empty_drops(self): rdata = os.path.join(base_dir, "tests/example_sce.RData") sce_from_rdata = SingleCellExperiment.fromRData(rdata) tenx = TenX() assay = sce_from_rdata.assays["counts"] values = tenx.emptyDrops(assay) print(values.keys())
def pca_by_cluster(rdata, tenx, prefix, pcs): # tenx = TenxAnalysis(tenx_analysis) # tenx.load() sce = SingleCellExperiment.fromRData(rdata) cluster_labels = tenx.clusters(sce, pcs=pcs) tsne_dims = sce.getReducedDims("PCA", n=pcs) barcodes = sce.colData["Barcode"] x_coded = dict(zip(barcodes, tsne_dims[0])) y_coded = dict(zip(barcodes, tsne_dims[1])) x = [] y = [] clusters = [] for barcode, cluster in cluster_labels.items(): try: x_val = x_coded[barcode] y_val = y_coded[barcode] except Exception as e: continue x.append(x_val) y.append(y_val) clusters.append("Cluster {}".format(cluster)) f, ax = plt.subplots(figsize=(10, 8)) sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85) ax.set_title("PCA - Clusters - {}".format(prefix)) ax.legend() plt.tight_layout() plt.savefig("{}/pca_by_cluster.png".format(prefix))
def scvis_by_cell_type(rdata, cell_assign_fit, prefix, embedding_file): fit = pickle.load(open(cell_assign_fit, "rb")) sce = SingleCellExperiment.fromRData(rdata) barcodes = sce.colData["Barcode"] cell_types = dict( zip(fit["Barcode"][:len(barcodes)], fit["cell_type"][:len(barcodes)])) rows = open(embedding_file, "r").read().splitlines() dims = [] rows.pop(0) for row in rows: row = row.split("\t") row = list(map(float, row[1:])) dims.append(row) x = [] y = [] clusters = [] for barcode, dim in zip(barcodes, dims): try: clusters.append(cell_types[barcode]) except KeyError as e: clusters.append("Other") x.append(dim[0]) y.append(dim[1]) f, ax = plt.subplots(figsize=(10, 8)) sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85) ax.set_title("SCVIS - Cell Type - {}".format(prefix)) ax.legend() plt.tight_layout() plt.savefig("figures/scvis_by_cell_type_{}.png".format(prefix))
def test_clone_align(self): example_rda = os.path.join(base_dir, "tests/example_sce.rda") example_clonealign_fit = os.path.join( example_rda, "tests/example_clonealign_fit.rda") sce = SingleCellExperiment.fromRData(example_rda) clonealigner = CloneAlign() res = clonealigner.run(sce)
def tsne_by_cluster(rdata, tenx, prefix, pcs): sce = SingleCellExperiment.fromRData(rdata) cluster_labels = tenx.clusters(sce, pcs=pcs) tsne_dims = sce.reducedDims["TSNE"] barcodes = sce.colData["Barcode"] tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes)) x_coded = dict(zip(barcodes, tsne_dims[0])) y_coded = dict(zip(barcodes, tsne_dims[1])) x = [] y = [] clusters = [] embedding = dict() labels = dict() for barcode, cluster in cluster_labels.items(): clusters.append("Cluster {}".format(cluster)) x.append(x_coded[barcode]) y.append(y_coded[barcode]) embedding[barcode] = (x_coded[barcode], y_coded[barcode]) labels[barcode] = cluster embedding_str = json.dumps(embedding) output = open("{}/tsne_embedding.json".format(prefix), "w") output.write(embedding_str) output.close() output = open("{}/tsne_clusters.json".format(prefix), "w") clusters_str = json.dumps(labels) output.write(clusters_str) output.close() f, ax = plt.subplots(figsize=(10, 8)) sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85) ax.set_title("TSNE - Clusters - {}".format(prefix)) ax.legend() plt.tight_layout() plt.savefig("{}/tsne_by_cluster.png".format(prefix))
def run(tenx, rdata, copy_number_data, clone_assignments, assay="counts", run_cmd=True): sce = SingleCellExperiment.fromRData(rdata) _genes = tenx.get_genes(sce) convert = tenx.gene_map(sce) genes = [] for gene in _genes: if gene in convert: genes.append(convert[gene]) else: genes.append(gene) adata = tenx.create_scanpy_adata(sce) matrix = adata.X.T assert assay in sce.assayNames, "Assay not present in SCE." if run_cmd: print("Calling CMD Clone Align.") if not os.path.exists("rdata/clone_align.rdata"): CloneAlign.run_command_line(adata, copy_number_data, clone_assignments, genes) if not os.path.exists("rdata/clone_align.rdata"): raise ValueError("Rscript 'run_clonealign.r' Failed.") cal = r.readRDS("rdata/cell_assign_fit.rdata") else: CloneAlignInterface = importr("clonealign") cal = CloneAlignInterface.clonealign(matrix, cnv_data) robjects.r.assign("clone_align_fit", clone_align_fit) robjects.r("saveRDS(clone_align_fit, file='{}')".format(filename))
def pca_by_cell_type(rdata, cell_assign_fit, prefix): sce = SingleCellExperiment.fromRData(rdata) fit = pickle.load(open(cell_assign_fit, "rb")) tsne_dims = sce.getReducedDims("PCA") barcodes = sce.colData["Barcode"] cell_types = dict( zip(fit["Barcode"][:len(barcodes)], fit["cell_type"][:len(barcodes)])) #tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes)) x_coded = dict(zip(barcodes, tsne_dims[0])) y_coded = dict(zip(barcodes, tsne_dims[1])) x = [] y = [] clusters = [] for barcode, cluster in cell_types.items(): try: x_val = x_coded[barcode] y_val = y_coded[barcode] except Exception as e: continue try: clusters.append(cell_types[barcode]) except Exception as e: clusters.append("Other") x.append(x_val) y.append(y_val) f, ax = plt.subplots(figsize=(10, 8)) sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85) ax.set_title("PCA - Cell Type - {}".format(prefix)) ax.legend() plt.tight_layout() plt.savefig("figures/pca_by_celltype.png")
def umap_by_gene(rdata, gene, prefix, pcs): tenx = TenxAnalysis(tenx_analysis) tenx.load() sce = SingleCellExperiment.fromRData(rdata) tsne_dims = sce.reducedDims["UMAP"] barcodes = sce.colData["Barcode"] transcripts = sce.rowData["Symbol"] adata = tenx.create_scanpy_adata(barcodes=barcodes, transcripts=symbols) assert len(barcodes) == len(adata[:, gene]) expression = dict(zip(barcodes, adata[:, gene])) tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes)) x_coded = dict(zip(barcodes, tsne_dims[0])) y_coded = dict(zip(barcodes, tsne_dims[1])) x = [] y = [] clusters = [] for barcode in barcodes: clusters.append(float(expression[barcode])) x.append(x_coded[barcode]) y.append(y_coded[barcode]) f, ax = plt.subplots(figsize=(10, 8)) sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85) ax.set_title("PCA - Clusters - {}".format(prefix)) ax.legend() plt.tight_layout() plt.savefig("figures/umap_by_{}.png".format(gene))
def test_raw_assay_type_equivelence(self): rdata = os.path.join(base_dir, "tests/example_sce.RData") sce_from_rdata = SingleCellExperiment.fromRData(rdata) tenx = DropletUtils() rs4_results = tenx.read10xCounts("/home/ceglian/data/raw_gene_bc_matrices/hg19/") sce_from_rs4 = SingleCellExperiment(rs4_results) self.assertEqual(type(sce_from_rdata.assays["counts"]),type(sce_from_rdata.assays["counts"]))
def adata(self, scepath, subset=None): if scepath is None: scepath = self.rdata scepath = os.path.abspath(scepath) print(scepath) sce = SingleCellExperiment.fromRData(scepath) return self.create_scanpy_adata(sce, subset=subset)
def plot_by_genes(rdata, tenx_analysis, genes, prefix, rep, pcs): tenx = TenxAnalysis(tenx_analysis) tenx.load() sce = SingleCellExperiment.fromRData(rdata) tsne_dims = sce.getReducedDims(rep) barcodes = sce.colData["Barcode"] transcripts = sce.rowData["Symbol"] adata = tenx.create_scanpy_adata(barcodes=barcodes, transcripts=transcripts) x_coded = dict(zip(barcodes, tsne_dims[0])) y_coded = dict(zip(barcodes, tsne_dims[1])) if not os.path.exists("figures/expression"): os.makedirs("figures/expression") x = [] y = [] for barcode in barcodes: x.append(x_coded[barcode]) y.append(y_coded[barcode]) for gene in genes: expression = [] for barcode in barcodes: val = adata[barcode, gene].X expression.append(float(val)) f, ax = plt.subplots(figsize=(10, 8)) sns.scatterplot(x=x, y=y, hue=expression, alpha=0.85) ax.set_title("{} Counts".format(gene)) ax.legend() plt.tight_layout() plt.savefig("figures/expression/expression_{}.png".format(gene))
def test_assay_names_rdata(self): expected_assays = ['BatchCellMeans', 'BaseCellMeans', 'BCV', 'CellMeans', 'TrueCounts', 'counts'] rdata = os.path.join(base_dir, "tests/example_sce.RData") sce_from_rdata = SingleCellExperiment.fromRData(rdata) assays = sce_from_rdata.assays assay_names = list(assays.keys()) for assay in assay_names: self.assertTrue(assay in expected_assays)
def umap_by_cluster(rdata, cluster_labels, sampleid, directory): sce = SingleCellExperiment.fromRData(rdata) umap_dims = sce.reducedDims["UMAP"] barcodes = sce.colData["Barcode"] umap_dims = numpy.array(umap_dims).reshape(2, len(barcodes)) filename = os.path.join(directory, "umap_by_cluster.png") reduced_dims_by_cluster(cluster_labels, umap_dims, barcodes, filename, "UMAP")
def test_cell_assign_em(self): example_rda = os.path.join(base_dir, "tests/cell_assign_test.RData") sce = SingleCellExperiment.fromRData(example_rda) cellassigner = CellAssign() rho = GeneMarkerMatrix(genes=[ "Gene161", "Gene447", "Gene519", "Gene609", "Gene677", "Gene750", "Gene754", "Gene860", "Gene929", "Gene979" ], cells=["Groups1", "Groups2"]) res = cellassigner.run_em(sce, rho)
def scvis_by_cluster_markers(rdata, tenx_analysis, prefix, pcs, embedding_file): try: tenx = TenxAnalysis(tenx_analysis) tenx.load() sce = SingleCellExperiment.fromRData(rdata) cluster_labels = tenx.markers_by_clusters( sce, rep="SCVIS", pcs=pcs, embedding_file=embedding_file) except Exception as e: return
def cluster_markers(rdata, tenx_analysis, rep, pcs, embedding_file, prefix): tenx = TenxAnalysis(tenx_analysis) tenx.load() sce = SingleCellExperiment.fromRData(rdata) markers = tenx.markers_by_clusters(sce, rep="PCA", pcs=pcs) markers_by_cluster = list(zip(*markers["rank_genes_groups"]["names"])) for i, markers in enumerate(markers_by_cluster): cluster_prefix = "Cluster {} {}".format(i, prefix) plot_by_markers(rdata, tenx_analysis, markers, cluster_prefix, rep, pcs, embedding_file)
def test_save_and_load_rdata(self): print("Reading") tenx = DropletUtils() rs4_result = tenx.read10xCounts("/home/ceglian/data/raw_gene_bc_matrices/hg19/") sce = SingleCellExperiment.fromRS4(rs4_result) print("Writing") sce.save("tests/sce_1.rdata") print("Loading...") sce_saved = SingleCellExperiment.fromRData("tests/sce_1.rdata") print(sce_saved.assays["counts"].shape)
def test_expression_normalization(self): rdata = os.path.join(base_dir, "tests/example_sce.RData") sce_from_rdata = SingleCellExperiment.fromRData(rdata) tenx = TenX() assay = sce_from_rdata.assays["counts"] cpm = tenx.calculateCPM(assay) tpm = tenx.calculateTPM(assay) fpkm = tenx.calculateFPKM(assay) assert cpm.shape == assay.shape assert tpm.shape == assay.shape assert fpkm.shape == assay.shape
def exportRData(rdata, directory, delim="\t"): if not os.path.exists(directory): os.makedirs(directory) sce = SingleCellExperiment.fromRData(rdata) output = open(os.path.join(directory,"meta.txt"),"w") output.write("sizeFactors: " + str(sce.sizeFactors) + "\n") output.write("reducedDims: " + str(sce.reducedDims) + "\n") for assay in sce.assayNames: filename = os.path.join(directory,"{}.csv".format(assay)) print(filename) dataframe = sce.assay(assay) dataframe.to_csv(filename,sep=delim)
def test_clone_align(self): example_rda = os.path.join(base_dir, "tests/example_sce.rda") sce = SingleCellExperiment.fromRData(example_rda) rowdata = sce.rowData cnv_data = [] for column in ["A","B","C"]: column = rowdata[column] cnv_data.append(column) cnv_data = numpy.transpose(numpy.array(cnv_data)) print(cnv_data.shape) clonealigner = CloneAlign() result = clonealigner.run(sce,cnv_data) assert len(result["clone"]) == 200
def tsne_by_cell_type(rdata, fit, sampleid, directory, known_types=None): sce = SingleCellExperiment.fromRData(rdata) tsne_dims = sce.reducedDims["TSNE"] barcodes = sce.colData["Barcode"] cell_types = dict(zip(fit["Barcode"], fit["cell_type"])) tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes)) filename = os.path.join(directory, "tsne_by_cell_type.png") reduced_dims_by_cell_type(cell_types, tsne_dims, barcodes, filename, "TSNE", known_types=known_types)
def IntegratedSummary(sce, sampleid, report): if not os.path.exists("viz/"): os.makedirs("viz") if not os.path.exists("viz/html/"): os.makedirs("viz/html/") if not os.path.exists("viz/{}.json".format(sampleid)): sce = SingleCellExperiment.fromRData(sce) column_data = dump_all_coldata(sce) patient_data = collections.defaultdict(dict) patient_data[sampleid]["celldata"] = column_data gene_data = dump_all_rowdata(sce) patient_data[sampleid]["genedata"] = gene_data logcounts = sce.assays["logcounts"].todense().tolist() log_count_matrix = collections.defaultdict(dict) for symbol, row in zip(gene_data["Symbol"], logcounts): for barcode, cell in zip(column_data["Barcode"], row): if float(cell) != 0.0: log_count_matrix[barcode][symbol] = cell patient_data[sampleid]["log_count_matrix"] = dict(log_count_matrix) rdims = sce.reducedDims["UMAP"] barcodes = sce.colData["Barcode"] rdims = numpy.array(rdims).reshape(2, len(barcodes)) _celltypes = sce.colData["cell_type"] celltypes = [] for celltype in _celltypes: if celltype == "Monocyte.Macrophage": celltype = "Monocyte/Macrophage" else: celltype = celltype.replace(".", " ") celltypes.append(celltype) fit = dict(zip(barcodes, celltypes)) x_coded = dict(zip(barcodes, rdims[0])) y_coded = dict(zip(barcodes, rdims[1])) coords = dict() for barcode, celltype in fit.items(): try: x_val = int(x_coded[barcode]) y_val = int(y_coded[barcode]) except Exception as e: continue coords[barcode] = (x_val, y_val) patient_data[sampleid]["cellassign"] = fit patient_data[sampleid]["umap"] = coords patient_data["rho"] = GeneMarkerMatrix.read_yaml( config.rho_matrix).marker_list patient_data_str = json.dumps(patient_data) output = open("viz/{}.json".format(sampleid), "w") output.write(str(patient_data_str)) output.close() shutil.copyfile("viz/{}.json".format(sampleid), report)
def cell_type_by_cluster(rdata, cell_assign_fit, tenx_analysis, prefix): tenx = TenxAnalysis(tenx_analysis) tenx.load() fit = pickle.load(open(cell_assign_fit, "rb")) cell_types = dict(zip(fit["Barcode"], fit["cell_type"])) sce = SingleCellExperiment.fromRData(rdata) cluster_labels = tenx.clusters(sce) clusters = dict(zip(sce.colData["Barcode"], cluster_labels)) data_by_cluster = collections.defaultdict(list) data_by_celltype = collections.defaultdict(list) cluster = [] cell_type = [] for barcode, cell in cell_types.items(): try: cluster.append(str(clusters[barcode])) cell_type.append(cell) data_by_celltype[cell] = str(clusters[barcode]) data_by_cluster[str(clusters[barcode])] = cell except Exception as e: continue f, ax = plt.subplots(figsize=(16, 8)) counts = collections.defaultdict(lambda: collections.defaultdict(int)) for cluster, ctype in zip(cluster, cell_type): counts[cluster][ctype] += 1 fclusters = [] fcelltypes = [] fpercentages = [] for cluster, ctype in counts.items(): total = float(sum(ctype.values())) for cell in cell_type: fcelltypes.append(cell) fclusters.append(cluster) if cell in ctype: fpercentages.append(float(ctype[cell]) / total) else: fpercentages.append(0.0) df = pandas.DataFrame({ "Cluster": fclusters, "Cell Type": fcelltypes, "Percentage": fpercentages }) ax = sns.barplot(x="Cluster", y="Percentage", hue="Cell Type", data=df, palette="tab10") ax.set_title("Cell Type by Cluster - {}".format(prefix)) plt.tight_layout() plt.savefig("figures/cell_type_by_cluster.png")
def create_input_files(rdata, components, output): sce = SingleCellExperiment.fromRData(rdata) embedding = sce.getReducedDims("PCA", n=components) counts = [] for i in range(0, len(embedding), components): counts.append(embedding[i:i + (components)]) counts = numpy.array(counts[0]) print(counts.shape) header = [] for c in range(components): header.append("PC_{}".format(c)) header = "\t".join(header) filename = os.path.join(output, "matrix.tsv") numpy.savetxt(filename, counts, delimiter="\t", header=header) return filename
def qcd_sce(self): # if not os.path.exists(self.qcdrdata): # qc = QualityControl(self) # qc.build() # qc.filter() # # # TenX.read10xCountsFiltered(self,self.rdata) # # rscript = ScaterCode(self.directory).generate_script() # # cwd = os.getcwd() # # os.chdir(self.directory) # # print(os.getcwd()) # # cmd = ["Rscript",os.path.split(rscript)[-1],self.rdata,self.qcdrdata] # # subprocess.call(cmd) # # os.chdir(cwd) # print (self.qcdrdata) return SingleCellExperiment.fromRData(self.qcdrdata)
def umap_by_cluster(rdata, tenx, prefix, pcs): sce = SingleCellExperiment.fromRData(rdata) cluster_labels = tenx.clusters(sce, pcs=pcs) tsne_dims = sce.reducedDims["UMAP"] barcodes = sce.colData["Barcode"] tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes)) x_coded = dict(zip(barcodes, tsne_dims[0])) y_coded = dict(zip(barcodes, tsne_dims[1])) x = [] y = [] clusters = [] for barcode, cluster in cluster_labels.items(): clusters.append("Cluster {}".format(cluster)) x.append(x_coded[barcode]) y.append(y_coded[barcode]) f, ax = plt.subplots(figsize=(10, 8)) sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85) ax.set_title("UMAP - Clusters - {}".format(prefix)) ax.legend() plt.tight_layout() plt.savefig("{}/umap_by_cluster.png".format(prefix))
def marker_analysis(sce, tenx, rho, cell_assign_fit, figure): sce = SingleCellExperiment.fromRData(sce) fit = pickle.load(open(cell_assign_fit, "rb")) gene_markers = [] for markers in rho.values(): gene_markers += markers _marker_genes = list(set(gene_markers)) convert = tenx.gene_map(sce) marker_genes = [] for gene in _marker_genes: try: marker_genes.append(convert[gene]) except KeyError: marker_genes.append(gene) print('No conversion for ', gene) print(marker_genes) adata = tenx.create_scanpy_adata(sce, fast_load=False) print(len(adata.obs.index)) print(len(fit["cell_type"])) cell_types = [] _cell_types = dict(zip(fit["Barcode"], fit["cell_type"])) for barcode in adata.obs.index: try: cell_types.append(_cell_types[barcode]) except KeyError as e: cell_types.append("Other") adata.obs["Cell Type"] = cell_types print(len(cell_types)) marker_genes = list(set(marker_genes).intersection(set(adata.var.index))) print(len(marker_genes)) print(marker_genes) sc.pl.dotplot(adata, marker_genes, groupby='Cell Type', save="matrix.png") sc.pl.stacked_violin(adata, marker_genes, groupby='Cell Type', rotation=90, save="vin_stacked.png") return ["dot_plot.png", "stacked_violinvin_stacked.png"]
def create_workflow(): workflow = pypeliner.workflow.Workflow() bcl_directory = args.get("bcl", None) fastq_directories = args.get("fastqs") aggregate = args.get("aggregate_mlibs", list()) agg_type = args.get("agg_method", "scanorama") libbase = args.get("lib_base", None) additional = args.get("additional", []) prefix = config.prefix output = config.jobpath recipe = args.get("recipe", "basic") try: cellranger_folder = os.path.join(output, prefix) os.makedirs(cellranger_folder) except Exception as e: pass if fastq_directories == None: fastq_directories = [] results = Results(output) runner = PrimaryRun(workflow, prefix, output) """ Aggregating Libraries """ if aggregate != None and len(aggregate) > 0: if agg_type == "tenx": runner.aggregate_libraries_tenx(aggregate, libbase) args["tenx"] = os.path.join(output, "run_{}/outs".format(prefix)) if agg_type == "scanorama": runner.aggregate_libraries_scanorama() """ Setup """ tenx_analysis = args.get("tenx", None) bcls = runner.set_bcl(bcl_directory) fastqs = runner.set_fastq(fastq_directories) workflow = runner.get_workflow() tenx_analysis = args.get("tenx", None) if fastqs != []: tenx_analysis = os.path.join(config.jobpath, prefix, "outs") rdata = args.get("rdata", None) secondary_analysis = SecondaryAnalysis(workflow, prefix, output) tenx = TenxAnalysis(tenx_analysis) """ QC """ secondary_analysis.run_scater() secondary_analysis.build_sce(tenx) secondary_analysis.set_rdata(rdata) results.add_analysis(tenx_analysis) results.add_workflow(secondary_analysis.rscript) results.add_sce(secondary_analysis.sce) umi = os.path.join(output, "figures/umi_distribution.png") mito = os.path.join(output, "figures/mito_distribution.png") ribo = os.path.join(output, "figures/ribo_distribution.png") freq = os.path.join(output, "figures/highestExprs.png") tech = os.path.join(output, "figures/mean_variance_trend.png") high_var = os.path.join(output, "figures/highly_variable_genes.png") results.add_plot(umi, "UMI Distribution") results.add_plot(mito, "Mito Distribution") results.add_plot(ribo, "Ribo Distribution") results.add_plot(freq, "Highest Frequency") results.add_plot(tech, "Mean Variance Trend") results.add_plot(high_var, "Highly Variable Genes") results.add_cellassign_pkl(secondary_analysis.cell_assign_fit) results.add_cellassign_raw(secondary_analysis.cell_assign_rdata) """ Differential Expression """ if config.run_de: other_samples = [] for other_sample in compare: print("blah") exit(0) secondary_analysis.run_de(other_sample) """ CellAssign """ if config.run_cellassign: tenx = TenxAnalysis(tenx_analysis) if hasattr(config, "rho_matrix"): rho_matrix = eval(open(config.rho_matrix, "r").read()) elif hasattr(config, "tissue"): sce = SingleCellExperiment.fromRData(secondary_analysis.sce) rho_matrix = generate_json(tenx, sce, config.organ) else: raise AssertionError("Not implemented.") secondary_analysis.run_cell_assign(rho_matrix, tenx_analysis, additional=combine_assign) results.add_cellassign_pkl(secondary_analysis.cell_assign_fit) results.add_cellassign_raw(secondary_analysis.cell_assign_rdata) path = secondary_analysis.plot_cell_types() results.add_plot(path, "Cell Type Frequency") path = secondary_analysis.plot_cell_type_by_cluster(tenx_analysis) results.add_plot(path, "Cell Type by Cluster") path = secondary_analysis.plot_tsne_by_cell_type() results.add_plot(path, "TSNE by Cell Type") path = secondary_analysis.plot_pca_by_cell_type() results.add_plot(path, "PCA by Cell Type") # path = secondary_analysis.plot_umap_by_cell_type() # results.add_plot(path, "UMAP by Cell Type") path1, path2 = secondary_analysis.marker_analysis(tenx, rho_matrix) results.add_plot(path1, "Heat Marker Gene Matrix") results.add_plot(path2, "Stacked Vin Marker Gene Matrix") """ SCVis """ if config.run_scvis: secondary_analysis.run_scviz(config.perplexity, config.components) """ CloneAlign """ if config.run_clonealign and config.copy_number_data is not None and config.clone_assignments is not None: secondary_analysis.run_clone_align(tenx, config.copy_number_data, config.clone_assignments) if config.plot_scvis: embedding_file = "{0}_{1}/perplexity_{0}_regularizer_0.001_batch_size_512_learning_rate_0.01_latent_dimension_2_activation_ELU_seed_1_iter_3000.tsv".format( config.perplexity, config.components) path = secondary_analysis.plot_scvis_by_cluster(tenx_analysis, embedding_file, pcs=config.components) path = os.path.join(output, path) results.add_plot(path, "SCVis by Cluster") if os.path.exists(config.run_cellassign): path = secondary_analysis.plot_scvis_by_cell_type( embedding_file, pcs=config.components) results.add_plot(path, "SCVIS by Cell Type") """ Cluster Analysis """ if config.clustering: path = secondary_analysis.plot_pca_by_cluster(tenx_analysis, pcs=config.components) results.add_plot(path, "PCA by Cluster") path = secondary_analysis.plot_tsne_by_cluster(tenx_analysis, pcs=config.components) results.add_plot(path, "TSNE by Cluster") path = secondary_analysis.plot_umap_by_cluster(tenx_analysis, pcs=config.components) results.add_plot(path, "UMAP by Cluster") secondary_analysis.plot_cluster_markers(tenx_analysis, rep="PCA", pcs=config.components) pca_cluster_markers = glob.glob("figures/expression/*pca*png") for png in pca_cluster_markers: title = png.split("/")[-1].replace(".png", "").replace( "counts", "gene markers").upper().replace("_", "") results.add_plot(png, title) secondary_analysis.plot_cluster_markers(tenx_analysis, rep="TSNE", pcs=config.components) pca_cluster_markers = glob.glob("figures/expression/*tsne*png") for png in pca_cluster_markers: title = png.split("/")[-1].replace(".png", "").replace( "counts", "gene markers").upper().replace("_", "") results.add_plot(png, title) secondary_analysis.plot_cluster_markers(tenx_analysis, rep="UMAP", pcs=config.components) pca_cluster_markers = glob.glob("figures/expression/*umap*png") for png in pca_cluster_markers: title = png.split("/")[-1].replace(".png", "").replace( "counts", "gene markers").upper().replace("_", "") results.add_plot(png, title) embedding_file = "{0}_{1}/perplexity_{0}_regularizer_0.001_batch_size_512_learning_rate_0.01_latent_dimension_2_activation_ELU_seed_1_iter_3000.tsv".format( config.perplexity, config.components) secondary_analysis.plot_cluster_markers(tenx_analysis, rep="SCVIS", pcs=config.components, embedding_file=embedding_file) pca_cluster_markers = glob.glob("figures/expression/*scvis_5_50*png") for png in pca_cluster_markers: title = png.split("/")[-1].replace(".png", "").replace( "counts", "gene markers").upper().replace("_", "") results.add_plot(png, title) """ Gene Level """ """ Reporting """ if config.report: workflow.transform(name="{}_markdown".format(prefix), func=exportMD, args=(results, )) if config.report: workflow.transform(name="{}_finalize".format(prefix), func=exportFinalize, args=(results, )) workflow = secondary_analysis.get_workflow() return workflow