예제 #1
0
 def test_raw_assay_type_equivelence(self):
     rdata = os.path.join(base_dir, "tests/example_sce.RData")
     sce_from_rdata = SingleCellExperiment.fromRData(rdata)
     tenx = DropletUtils()
     rs4_results = tenx.read10xCounts("/home/ceglian/data/raw_gene_bc_matrices/hg19/")
     sce_from_rs4 = SingleCellExperiment(rs4_results)
     self.assertEqual(type(sce_from_rdata.assays["counts"]),type(sce_from_rdata.assays["counts"]))
예제 #2
0
 def test_save_and_load_rdata(self):
     print("Reading")
     tenx = DropletUtils()
     rs4_result = tenx.read10xCounts("/home/ceglian/data/raw_gene_bc_matrices/hg19/")
     sce = SingleCellExperiment.fromRS4(rs4_result)
     print("Writing")
     sce.save("tests/sce_1.rdata")
     print("Loading...")
     sce_saved = SingleCellExperiment.fromRData("tests/sce_1.rdata")
     print(sce_saved.assays["counts"].shape)
예제 #3
0
 def test_symbol_retrieve(self):
     tenx = TenxAnalysis("tests/pre_igo")
     sce = TenX.read10xCounts(tenx)
     print(sce.rowData.keys())
     example_rda = os.path.join(base_dir, "tests/example_sce.rda")
     sce = SingleCellExperiment.fromRData(example_rda)
     print(sce.rowData.keys())
     tenx = DropletUtils()
     rs4_result = tenx.read10xCounts("tests/hg19/")
     sce = SingleCellExperiment.fromRS4(rs4_result)
     print(sce.rowData.keys())
     example_rda = os.path.join(base_dir, "tests/example_copy_number.rda")
     sce = SingleCellExperiment.fromRData(example_rda)
     print(sce.rowData.keys())
     print(sce.rownames)
     print(sce.colnames)
예제 #4
0
def scvis_by_cluster(rdata, tenx, prefix, pcs, embedding_file):
    # tenx = TenxAnalysis(tenx_analysis)
    # tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    cluster_labels = tenx.clusters(sce, pcs=pcs)
    rows = open(embedding_file, "r").read().splitlines()
    dims = []
    rows.pop(0)
    for row in rows:
        row = row.split("\t")
        row = list(map(float, row[1:]))
        dims.append(row)
    barcodes = sce.colData["Barcode"]
    print(dims)
    x = []
    y = []
    clusters = []
    for barcode, dim in zip(barcodes, dims):
        x.append(dim[0])
        y.append(dim[1])
        clusters.append("Cluster {}".format(cluster_labels[barcode]))
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("SCVIS - Clusters - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("{}}/svis_by_cluster.png".format(prefix))
예제 #5
0
 def test_call_empty_drops(self):
     rdata = os.path.join(base_dir, "tests/example_sce.RData")
     sce_from_rdata = SingleCellExperiment.fromRData(rdata)
     tenx = TenX()
     assay = sce_from_rdata.assays["counts"]
     values = tenx.emptyDrops(assay)
     print(values.keys())
예제 #6
0
def pca_by_cluster(rdata, tenx, prefix, pcs):
    # tenx = TenxAnalysis(tenx_analysis)
    # tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    cluster_labels = tenx.clusters(sce, pcs=pcs)
    tsne_dims = sce.getReducedDims("PCA", n=pcs)
    barcodes = sce.colData["Barcode"]
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    x = []
    y = []
    clusters = []
    for barcode, cluster in cluster_labels.items():
        try:
            x_val = x_coded[barcode]
            y_val = y_coded[barcode]
        except Exception as e:
            continue
        x.append(x_val)
        y.append(y_val)
        clusters.append("Cluster {}".format(cluster))
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("PCA - Clusters - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("{}/pca_by_cluster.png".format(prefix))
예제 #7
0
def tsne_by_cluster(rdata, tenx, prefix, pcs):
    sce = SingleCellExperiment.fromRData(rdata)
    cluster_labels = tenx.clusters(sce, pcs=pcs)
    tsne_dims = sce.reducedDims["TSNE"]
    barcodes = sce.colData["Barcode"]
    tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes))
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    x = []
    y = []
    clusters = []
    embedding = dict()
    labels = dict()
    for barcode, cluster in cluster_labels.items():
        clusters.append("Cluster {}".format(cluster))
        x.append(x_coded[barcode])
        y.append(y_coded[barcode])
        embedding[barcode] = (x_coded[barcode], y_coded[barcode])
        labels[barcode] = cluster
    embedding_str = json.dumps(embedding)
    output = open("{}/tsne_embedding.json".format(prefix), "w")
    output.write(embedding_str)
    output.close()
    output = open("{}/tsne_clusters.json".format(prefix), "w")
    clusters_str = json.dumps(labels)
    output.write(clusters_str)
    output.close()
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("TSNE - Clusters - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("{}/tsne_by_cluster.png".format(prefix))
예제 #8
0
 def adata(self, scepath, subset=None):
     if scepath is None:
         scepath = self.rdata
     scepath = os.path.abspath(scepath)
     print(scepath)
     sce = SingleCellExperiment.fromRData(scepath)
     return self.create_scanpy_adata(sce, subset=subset)
예제 #9
0
 def test_clone_align(self):
     example_rda = os.path.join(base_dir, "tests/example_sce.rda")
     example_clonealign_fit = os.path.join(
         example_rda, "tests/example_clonealign_fit.rda")
     sce = SingleCellExperiment.fromRData(example_rda)
     clonealigner = CloneAlign()
     res = clonealigner.run(sce)
예제 #10
0
def pca_by_cell_type(rdata, cell_assign_fit, prefix):
    sce = SingleCellExperiment.fromRData(rdata)
    fit = pickle.load(open(cell_assign_fit, "rb"))
    tsne_dims = sce.getReducedDims("PCA")
    barcodes = sce.colData["Barcode"]
    cell_types = dict(
        zip(fit["Barcode"][:len(barcodes)], fit["cell_type"][:len(barcodes)]))
    #tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes))
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    x = []
    y = []
    clusters = []
    for barcode, cluster in cell_types.items():
        try:
            x_val = x_coded[barcode]
            y_val = y_coded[barcode]
        except Exception as e:
            continue
        try:
            clusters.append(cell_types[barcode])
        except Exception as e:
            clusters.append("Other")
        x.append(x_val)
        y.append(y_val)
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("PCA - Cell Type - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("figures/pca_by_celltype.png")
예제 #11
0
def umap_by_gene(rdata, gene, prefix, pcs):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    tsne_dims = sce.reducedDims["UMAP"]
    barcodes = sce.colData["Barcode"]
    transcripts = sce.rowData["Symbol"]
    adata = tenx.create_scanpy_adata(barcodes=barcodes, transcripts=symbols)
    assert len(barcodes) == len(adata[:, gene])
    expression = dict(zip(barcodes, adata[:, gene]))
    tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes))
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    x = []
    y = []
    clusters = []
    for barcode in barcodes:
        clusters.append(float(expression[barcode]))
        x.append(x_coded[barcode])
        y.append(y_coded[barcode])
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("PCA - Clusters - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("figures/umap_by_{}.png".format(gene))
예제 #12
0
 def run(tenx,
         rdata,
         copy_number_data,
         clone_assignments,
         assay="counts",
         run_cmd=True):
     sce = SingleCellExperiment.fromRData(rdata)
     _genes = tenx.get_genes(sce)
     convert = tenx.gene_map(sce)
     genes = []
     for gene in _genes:
         if gene in convert:
             genes.append(convert[gene])
         else:
             genes.append(gene)
     adata = tenx.create_scanpy_adata(sce)
     matrix = adata.X.T
     assert assay in sce.assayNames, "Assay not present in SCE."
     if run_cmd:
         print("Calling CMD Clone Align.")
         if not os.path.exists("rdata/clone_align.rdata"):
             CloneAlign.run_command_line(adata, copy_number_data,
                                         clone_assignments, genes)
         if not os.path.exists("rdata/clone_align.rdata"):
             raise ValueError("Rscript 'run_clonealign.r' Failed.")
         cal = r.readRDS("rdata/cell_assign_fit.rdata")
     else:
         CloneAlignInterface = importr("clonealign")
         cal = CloneAlignInterface.clonealign(matrix, cnv_data)
         robjects.r.assign("clone_align_fit", clone_align_fit)
         robjects.r("saveRDS(clone_align_fit, file='{}')".format(filename))
예제 #13
0
def plot_by_genes(rdata, tenx_analysis, genes, prefix, rep, pcs):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    tsne_dims = sce.getReducedDims(rep)
    barcodes = sce.colData["Barcode"]
    transcripts = sce.rowData["Symbol"]
    adata = tenx.create_scanpy_adata(barcodes=barcodes,
                                     transcripts=transcripts)
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    if not os.path.exists("figures/expression"):
        os.makedirs("figures/expression")
    x = []
    y = []
    for barcode in barcodes:
        x.append(x_coded[barcode])
        y.append(y_coded[barcode])
    for gene in genes:
        expression = []
        for barcode in barcodes:
            val = adata[barcode, gene].X
            expression.append(float(val))
        f, ax = plt.subplots(figsize=(10, 8))
        sns.scatterplot(x=x, y=y, hue=expression, alpha=0.85)
        ax.set_title("{} Counts".format(gene))
        ax.legend()
        plt.tight_layout()
        plt.savefig("figures/expression/expression_{}.png".format(gene))
예제 #14
0
 def read10xCountsRaw(tenx_analysis, output):
     tenx_analysis.load()
     utils = DropletUtils()
     counts = utils.read10xCounts(tenx_analysis.raw_matrices())
     sce = SingleCellExperiment.fromRS4(counts)
     sce.save(output)
     return sce
예제 #15
0
def scvis_by_cell_type(rdata, cell_assign_fit, prefix, embedding_file):
    fit = pickle.load(open(cell_assign_fit, "rb"))
    sce = SingleCellExperiment.fromRData(rdata)
    barcodes = sce.colData["Barcode"]
    cell_types = dict(
        zip(fit["Barcode"][:len(barcodes)], fit["cell_type"][:len(barcodes)]))
    rows = open(embedding_file, "r").read().splitlines()
    dims = []
    rows.pop(0)
    for row in rows:
        row = row.split("\t")
        row = list(map(float, row[1:]))
        dims.append(row)
    x = []
    y = []
    clusters = []
    for barcode, dim in zip(barcodes, dims):
        try:
            clusters.append(cell_types[barcode])
        except KeyError as e:
            clusters.append("Other")
        x.append(dim[0])
        y.append(dim[1])
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("SCVIS - Cell Type - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("figures/scvis_by_cell_type_{}.png".format(prefix))
예제 #16
0
 def test_assay_names_rdata(self):
     expected_assays = ['BatchCellMeans', 'BaseCellMeans', 'BCV', 'CellMeans', 'TrueCounts', 'counts']
     rdata = os.path.join(base_dir, "tests/example_sce.RData")
     sce_from_rdata = SingleCellExperiment.fromRData(rdata)
     assays = sce_from_rdata.assays
     assay_names = list(assays.keys())
     for assay in assay_names:
         self.assertTrue(assay in expected_assays)
예제 #17
0
def umap_by_cluster(rdata, cluster_labels, sampleid, directory):
    sce = SingleCellExperiment.fromRData(rdata)
    umap_dims = sce.reducedDims["UMAP"]
    barcodes = sce.colData["Barcode"]
    umap_dims = numpy.array(umap_dims).reshape(2, len(barcodes))
    filename = os.path.join(directory, "umap_by_cluster.png")
    reduced_dims_by_cluster(cluster_labels, umap_dims, barcodes, filename,
                            "UMAP")
예제 #18
0
def cluster_markers(rdata, tenx_analysis, rep, pcs, embedding_file, prefix):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    markers = tenx.markers_by_clusters(sce, rep="PCA", pcs=pcs)
    markers_by_cluster = list(zip(*markers["rank_genes_groups"]["names"]))
    for i, markers in enumerate(markers_by_cluster):
        cluster_prefix = "Cluster {} {}".format(i, prefix)
        plot_by_markers(rdata, tenx_analysis, markers, cluster_prefix, rep,
                        pcs, embedding_file)
예제 #19
0
 def test_cell_assign_em(self):
     example_rda = os.path.join(base_dir, "tests/cell_assign_test.RData")
     sce = SingleCellExperiment.fromRData(example_rda)
     cellassigner = CellAssign()
     rho = GeneMarkerMatrix(genes=[
         "Gene161", "Gene447", "Gene519", "Gene609", "Gene677", "Gene750",
         "Gene754", "Gene860", "Gene929", "Gene979"
     ],
                            cells=["Groups1", "Groups2"])
     res = cellassigner.run_em(sce, rho)
예제 #20
0
def scvis_by_cluster_markers(rdata, tenx_analysis, prefix, pcs,
                             embedding_file):
    try:
        tenx = TenxAnalysis(tenx_analysis)
        tenx.load()
        sce = SingleCellExperiment.fromRData(rdata)
        cluster_labels = tenx.markers_by_clusters(
            sce, rep="SCVIS", pcs=pcs, embedding_file=embedding_file)
    except Exception as e:
        return
예제 #21
0
 def test_expression_normalization(self):
     rdata = os.path.join(base_dir, "tests/example_sce.RData")
     sce_from_rdata = SingleCellExperiment.fromRData(rdata)
     tenx = TenX()
     assay = sce_from_rdata.assays["counts"]
     cpm = tenx.calculateCPM(assay)
     tpm = tenx.calculateTPM(assay)
     fpkm = tenx.calculateFPKM(assay)
     assert cpm.shape == assay.shape
     assert tpm.shape == assay.shape
     assert fpkm.shape == assay.shape
예제 #22
0
def exportRData(rdata, directory, delim="\t"):
    if not os.path.exists(directory):
        os.makedirs(directory)
    sce = SingleCellExperiment.fromRData(rdata)
    output = open(os.path.join(directory,"meta.txt"),"w")
    output.write("sizeFactors: " + str(sce.sizeFactors) + "\n")
    output.write("reducedDims: " + str(sce.reducedDims) + "\n")
    for assay in sce.assayNames:
        filename = os.path.join(directory,"{}.csv".format(assay))
        print(filename)
        dataframe = sce.assay(assay)
        dataframe.to_csv(filename,sep=delim)
예제 #23
0
def tsne_by_cell_type(rdata, fit, sampleid, directory, known_types=None):
    sce = SingleCellExperiment.fromRData(rdata)
    tsne_dims = sce.reducedDims["TSNE"]
    barcodes = sce.colData["Barcode"]
    cell_types = dict(zip(fit["Barcode"], fit["cell_type"]))
    tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes))
    filename = os.path.join(directory, "tsne_by_cell_type.png")
    reduced_dims_by_cell_type(cell_types,
                              tsne_dims,
                              barcodes,
                              filename,
                              "TSNE",
                              known_types=known_types)
예제 #24
0
 def test_clone_align(self):
     example_rda = os.path.join(base_dir, "tests/example_sce.rda")
     sce = SingleCellExperiment.fromRData(example_rda)
     rowdata = sce.rowData
     cnv_data = []
     for column in ["A","B","C"]:
         column = rowdata[column]
         cnv_data.append(column)
     cnv_data = numpy.transpose(numpy.array(cnv_data))
     print(cnv_data.shape)
     clonealigner = CloneAlign()
     result = clonealigner.run(sce,cnv_data)
     assert len(result["clone"]) == 200
예제 #25
0
def IntegratedSummary(sce, sampleid, report):
    if not os.path.exists("viz/"):
        os.makedirs("viz")
    if not os.path.exists("viz/html/"):
        os.makedirs("viz/html/")
    if not os.path.exists("viz/{}.json".format(sampleid)):
        sce = SingleCellExperiment.fromRData(sce)
        column_data = dump_all_coldata(sce)
        patient_data = collections.defaultdict(dict)
        patient_data[sampleid]["celldata"] = column_data
        gene_data = dump_all_rowdata(sce)
        patient_data[sampleid]["genedata"] = gene_data
        logcounts = sce.assays["logcounts"].todense().tolist()
        log_count_matrix = collections.defaultdict(dict)
        for symbol, row in zip(gene_data["Symbol"], logcounts):
            for barcode, cell in zip(column_data["Barcode"], row):
                if float(cell) != 0.0:
                    log_count_matrix[barcode][symbol] = cell
        patient_data[sampleid]["log_count_matrix"] = dict(log_count_matrix)
        rdims = sce.reducedDims["UMAP"]
        barcodes = sce.colData["Barcode"]
        rdims = numpy.array(rdims).reshape(2, len(barcodes))
        _celltypes = sce.colData["cell_type"]
        celltypes = []
        for celltype in _celltypes:
            if celltype == "Monocyte.Macrophage":
                celltype = "Monocyte/Macrophage"
            else:
                celltype = celltype.replace(".", " ")
            celltypes.append(celltype)
        fit = dict(zip(barcodes, celltypes))
        x_coded = dict(zip(barcodes, rdims[0]))
        y_coded = dict(zip(barcodes, rdims[1]))
        coords = dict()
        for barcode, celltype in fit.items():
            try:
                x_val = int(x_coded[barcode])
                y_val = int(y_coded[barcode])
            except Exception as e:
                continue
            coords[barcode] = (x_val, y_val)
        patient_data[sampleid]["cellassign"] = fit
        patient_data[sampleid]["umap"] = coords
        patient_data["rho"] = GeneMarkerMatrix.read_yaml(
            config.rho_matrix).marker_list
        patient_data_str = json.dumps(patient_data)
        output = open("viz/{}.json".format(sampleid), "w")
        output.write(str(patient_data_str))
        output.close()
    shutil.copyfile("viz/{}.json".format(sampleid), report)
예제 #26
0
def cell_type_by_cluster(rdata, cell_assign_fit, tenx_analysis, prefix):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    fit = pickle.load(open(cell_assign_fit, "rb"))
    cell_types = dict(zip(fit["Barcode"], fit["cell_type"]))
    sce = SingleCellExperiment.fromRData(rdata)
    cluster_labels = tenx.clusters(sce)
    clusters = dict(zip(sce.colData["Barcode"], cluster_labels))
    data_by_cluster = collections.defaultdict(list)
    data_by_celltype = collections.defaultdict(list)
    cluster = []
    cell_type = []
    for barcode, cell in cell_types.items():
        try:
            cluster.append(str(clusters[barcode]))
            cell_type.append(cell)
            data_by_celltype[cell] = str(clusters[barcode])
            data_by_cluster[str(clusters[barcode])] = cell
        except Exception as e:
            continue
    f, ax = plt.subplots(figsize=(16, 8))
    counts = collections.defaultdict(lambda: collections.defaultdict(int))
    for cluster, ctype in zip(cluster, cell_type):
        counts[cluster][ctype] += 1
    fclusters = []
    fcelltypes = []
    fpercentages = []
    for cluster, ctype in counts.items():
        total = float(sum(ctype.values()))
        for cell in cell_type:
            fcelltypes.append(cell)
            fclusters.append(cluster)
            if cell in ctype:
                fpercentages.append(float(ctype[cell]) / total)
            else:
                fpercentages.append(0.0)
    df = pandas.DataFrame({
        "Cluster": fclusters,
        "Cell Type": fcelltypes,
        "Percentage": fpercentages
    })
    ax = sns.barplot(x="Cluster",
                     y="Percentage",
                     hue="Cell Type",
                     data=df,
                     palette="tab10")
    ax.set_title("Cell Type by Cluster - {}".format(prefix))
    plt.tight_layout()
    plt.savefig("figures/cell_type_by_cluster.png")
예제 #27
0
 def create_input_files(rdata, components, output):
     sce = SingleCellExperiment.fromRData(rdata)
     embedding = sce.getReducedDims("PCA", n=components)
     counts = []
     for i in range(0, len(embedding), components):
         counts.append(embedding[i:i + (components)])
     counts = numpy.array(counts[0])
     print(counts.shape)
     header = []
     for c in range(components):
         header.append("PC_{}".format(c))
     header = "\t".join(header)
     filename = os.path.join(output, "matrix.tsv")
     numpy.savetxt(filename, counts, delimiter="\t", header=header)
     return filename
예제 #28
0
 def qcd_sce(self):
     # if not os.path.exists(self.qcdrdata):
     #     qc = QualityControl(self)
     #     qc.build()
     #     qc.filter()
     #
     #     # TenX.read10xCountsFiltered(self,self.rdata)
     #     # rscript = ScaterCode(self.directory).generate_script()
     #     # cwd = os.getcwd()
     #     # os.chdir(self.directory)
     #     # print(os.getcwd())
     #     # cmd = ["Rscript",os.path.split(rscript)[-1],self.rdata,self.qcdrdata]
     #     # subprocess.call(cmd)
     #     # os.chdir(cwd)
     # print (self.qcdrdata)
     return SingleCellExperiment.fromRData(self.qcdrdata)
예제 #29
0
def umap_by_cluster(rdata, tenx, prefix, pcs):
    sce = SingleCellExperiment.fromRData(rdata)
    cluster_labels = tenx.clusters(sce, pcs=pcs)
    tsne_dims = sce.reducedDims["UMAP"]
    barcodes = sce.colData["Barcode"]
    tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes))
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    x = []
    y = []
    clusters = []
    for barcode, cluster in cluster_labels.items():
        clusters.append("Cluster {}".format(cluster))
        x.append(x_coded[barcode])
        y.append(y_coded[barcode])
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("UMAP - Clusters - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("{}/umap_by_cluster.png".format(prefix))
예제 #30
0
def marker_analysis(sce, tenx, rho, cell_assign_fit, figure):
    sce = SingleCellExperiment.fromRData(sce)
    fit = pickle.load(open(cell_assign_fit, "rb"))
    gene_markers = []
    for markers in rho.values():
        gene_markers += markers
    _marker_genes = list(set(gene_markers))
    convert = tenx.gene_map(sce)
    marker_genes = []
    for gene in _marker_genes:
        try:
            marker_genes.append(convert[gene])
        except KeyError:
            marker_genes.append(gene)
            print('No conversion for ', gene)
    print(marker_genes)
    adata = tenx.create_scanpy_adata(sce, fast_load=False)
    print(len(adata.obs.index))
    print(len(fit["cell_type"]))
    cell_types = []
    _cell_types = dict(zip(fit["Barcode"], fit["cell_type"]))
    for barcode in adata.obs.index:
        try:
            cell_types.append(_cell_types[barcode])
        except KeyError as e:
            cell_types.append("Other")
    adata.obs["Cell Type"] = cell_types
    print(len(cell_types))
    marker_genes = list(set(marker_genes).intersection(set(adata.var.index)))
    print(len(marker_genes))
    print(marker_genes)
    sc.pl.dotplot(adata, marker_genes, groupby='Cell Type', save="matrix.png")
    sc.pl.stacked_violin(adata,
                         marker_genes,
                         groupby='Cell Type',
                         rotation=90,
                         save="vin_stacked.png")
    return ["dot_plot.png", "stacked_violinvin_stacked.png"]