Exemple #1
0
 def test_symbol_retrieve(self):
     tenx = TenxAnalysis("tests/pre_igo")
     sce = TenX.read10xCounts(tenx)
     print(sce.rowData.keys())
     example_rda = os.path.join(base_dir, "tests/example_sce.rda")
     sce = SingleCellExperiment.fromRData(example_rda)
     print(sce.rowData.keys())
     tenx = DropletUtils()
     rs4_result = tenx.read10xCounts("tests/hg19/")
     sce = SingleCellExperiment.fromRS4(rs4_result)
     print(sce.rowData.keys())
     example_rda = os.path.join(base_dir, "tests/example_copy_number.rda")
     sce = SingleCellExperiment.fromRData(example_rda)
     print(sce.rowData.keys())
     print(sce.rownames)
     print(sce.colnames)
Exemple #2
0
def scvis_by_cluster(rdata, tenx, prefix, pcs, embedding_file):
    # tenx = TenxAnalysis(tenx_analysis)
    # tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    cluster_labels = tenx.clusters(sce, pcs=pcs)
    rows = open(embedding_file, "r").read().splitlines()
    dims = []
    rows.pop(0)
    for row in rows:
        row = row.split("\t")
        row = list(map(float, row[1:]))
        dims.append(row)
    barcodes = sce.colData["Barcode"]
    print(dims)
    x = []
    y = []
    clusters = []
    for barcode, dim in zip(barcodes, dims):
        x.append(dim[0])
        y.append(dim[1])
        clusters.append("Cluster {}".format(cluster_labels[barcode]))
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("SCVIS - Clusters - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("{}}/svis_by_cluster.png".format(prefix))
Exemple #3
0
 def test_call_empty_drops(self):
     rdata = os.path.join(base_dir, "tests/example_sce.RData")
     sce_from_rdata = SingleCellExperiment.fromRData(rdata)
     tenx = TenX()
     assay = sce_from_rdata.assays["counts"]
     values = tenx.emptyDrops(assay)
     print(values.keys())
Exemple #4
0
def pca_by_cluster(rdata, tenx, prefix, pcs):
    # tenx = TenxAnalysis(tenx_analysis)
    # tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    cluster_labels = tenx.clusters(sce, pcs=pcs)
    tsne_dims = sce.getReducedDims("PCA", n=pcs)
    barcodes = sce.colData["Barcode"]
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    x = []
    y = []
    clusters = []
    for barcode, cluster in cluster_labels.items():
        try:
            x_val = x_coded[barcode]
            y_val = y_coded[barcode]
        except Exception as e:
            continue
        x.append(x_val)
        y.append(y_val)
        clusters.append("Cluster {}".format(cluster))
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("PCA - Clusters - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("{}/pca_by_cluster.png".format(prefix))
Exemple #5
0
def scvis_by_cell_type(rdata, cell_assign_fit, prefix, embedding_file):
    fit = pickle.load(open(cell_assign_fit, "rb"))
    sce = SingleCellExperiment.fromRData(rdata)
    barcodes = sce.colData["Barcode"]
    cell_types = dict(
        zip(fit["Barcode"][:len(barcodes)], fit["cell_type"][:len(barcodes)]))
    rows = open(embedding_file, "r").read().splitlines()
    dims = []
    rows.pop(0)
    for row in rows:
        row = row.split("\t")
        row = list(map(float, row[1:]))
        dims.append(row)
    x = []
    y = []
    clusters = []
    for barcode, dim in zip(barcodes, dims):
        try:
            clusters.append(cell_types[barcode])
        except KeyError as e:
            clusters.append("Other")
        x.append(dim[0])
        y.append(dim[1])
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("SCVIS - Cell Type - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("figures/scvis_by_cell_type_{}.png".format(prefix))
Exemple #6
0
 def test_clone_align(self):
     example_rda = os.path.join(base_dir, "tests/example_sce.rda")
     example_clonealign_fit = os.path.join(
         example_rda, "tests/example_clonealign_fit.rda")
     sce = SingleCellExperiment.fromRData(example_rda)
     clonealigner = CloneAlign()
     res = clonealigner.run(sce)
Exemple #7
0
def tsne_by_cluster(rdata, tenx, prefix, pcs):
    sce = SingleCellExperiment.fromRData(rdata)
    cluster_labels = tenx.clusters(sce, pcs=pcs)
    tsne_dims = sce.reducedDims["TSNE"]
    barcodes = sce.colData["Barcode"]
    tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes))
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    x = []
    y = []
    clusters = []
    embedding = dict()
    labels = dict()
    for barcode, cluster in cluster_labels.items():
        clusters.append("Cluster {}".format(cluster))
        x.append(x_coded[barcode])
        y.append(y_coded[barcode])
        embedding[barcode] = (x_coded[barcode], y_coded[barcode])
        labels[barcode] = cluster
    embedding_str = json.dumps(embedding)
    output = open("{}/tsne_embedding.json".format(prefix), "w")
    output.write(embedding_str)
    output.close()
    output = open("{}/tsne_clusters.json".format(prefix), "w")
    clusters_str = json.dumps(labels)
    output.write(clusters_str)
    output.close()
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("TSNE - Clusters - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("{}/tsne_by_cluster.png".format(prefix))
Exemple #8
0
 def run(tenx,
         rdata,
         copy_number_data,
         clone_assignments,
         assay="counts",
         run_cmd=True):
     sce = SingleCellExperiment.fromRData(rdata)
     _genes = tenx.get_genes(sce)
     convert = tenx.gene_map(sce)
     genes = []
     for gene in _genes:
         if gene in convert:
             genes.append(convert[gene])
         else:
             genes.append(gene)
     adata = tenx.create_scanpy_adata(sce)
     matrix = adata.X.T
     assert assay in sce.assayNames, "Assay not present in SCE."
     if run_cmd:
         print("Calling CMD Clone Align.")
         if not os.path.exists("rdata/clone_align.rdata"):
             CloneAlign.run_command_line(adata, copy_number_data,
                                         clone_assignments, genes)
         if not os.path.exists("rdata/clone_align.rdata"):
             raise ValueError("Rscript 'run_clonealign.r' Failed.")
         cal = r.readRDS("rdata/cell_assign_fit.rdata")
     else:
         CloneAlignInterface = importr("clonealign")
         cal = CloneAlignInterface.clonealign(matrix, cnv_data)
         robjects.r.assign("clone_align_fit", clone_align_fit)
         robjects.r("saveRDS(clone_align_fit, file='{}')".format(filename))
Exemple #9
0
def pca_by_cell_type(rdata, cell_assign_fit, prefix):
    sce = SingleCellExperiment.fromRData(rdata)
    fit = pickle.load(open(cell_assign_fit, "rb"))
    tsne_dims = sce.getReducedDims("PCA")
    barcodes = sce.colData["Barcode"]
    cell_types = dict(
        zip(fit["Barcode"][:len(barcodes)], fit["cell_type"][:len(barcodes)]))
    #tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes))
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    x = []
    y = []
    clusters = []
    for barcode, cluster in cell_types.items():
        try:
            x_val = x_coded[barcode]
            y_val = y_coded[barcode]
        except Exception as e:
            continue
        try:
            clusters.append(cell_types[barcode])
        except Exception as e:
            clusters.append("Other")
        x.append(x_val)
        y.append(y_val)
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("PCA - Cell Type - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("figures/pca_by_celltype.png")
Exemple #10
0
def umap_by_gene(rdata, gene, prefix, pcs):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    tsne_dims = sce.reducedDims["UMAP"]
    barcodes = sce.colData["Barcode"]
    transcripts = sce.rowData["Symbol"]
    adata = tenx.create_scanpy_adata(barcodes=barcodes, transcripts=symbols)
    assert len(barcodes) == len(adata[:, gene])
    expression = dict(zip(barcodes, adata[:, gene]))
    tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes))
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    x = []
    y = []
    clusters = []
    for barcode in barcodes:
        clusters.append(float(expression[barcode]))
        x.append(x_coded[barcode])
        y.append(y_coded[barcode])
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("PCA - Clusters - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("figures/umap_by_{}.png".format(gene))
Exemple #11
0
 def test_raw_assay_type_equivelence(self):
     rdata = os.path.join(base_dir, "tests/example_sce.RData")
     sce_from_rdata = SingleCellExperiment.fromRData(rdata)
     tenx = DropletUtils()
     rs4_results = tenx.read10xCounts("/home/ceglian/data/raw_gene_bc_matrices/hg19/")
     sce_from_rs4 = SingleCellExperiment(rs4_results)
     self.assertEqual(type(sce_from_rdata.assays["counts"]),type(sce_from_rdata.assays["counts"]))
Exemple #12
0
 def adata(self, scepath, subset=None):
     if scepath is None:
         scepath = self.rdata
     scepath = os.path.abspath(scepath)
     print(scepath)
     sce = SingleCellExperiment.fromRData(scepath)
     return self.create_scanpy_adata(sce, subset=subset)
Exemple #13
0
def plot_by_genes(rdata, tenx_analysis, genes, prefix, rep, pcs):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    tsne_dims = sce.getReducedDims(rep)
    barcodes = sce.colData["Barcode"]
    transcripts = sce.rowData["Symbol"]
    adata = tenx.create_scanpy_adata(barcodes=barcodes,
                                     transcripts=transcripts)
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    if not os.path.exists("figures/expression"):
        os.makedirs("figures/expression")
    x = []
    y = []
    for barcode in barcodes:
        x.append(x_coded[barcode])
        y.append(y_coded[barcode])
    for gene in genes:
        expression = []
        for barcode in barcodes:
            val = adata[barcode, gene].X
            expression.append(float(val))
        f, ax = plt.subplots(figsize=(10, 8))
        sns.scatterplot(x=x, y=y, hue=expression, alpha=0.85)
        ax.set_title("{} Counts".format(gene))
        ax.legend()
        plt.tight_layout()
        plt.savefig("figures/expression/expression_{}.png".format(gene))
Exemple #14
0
 def test_assay_names_rdata(self):
     expected_assays = ['BatchCellMeans', 'BaseCellMeans', 'BCV', 'CellMeans', 'TrueCounts', 'counts']
     rdata = os.path.join(base_dir, "tests/example_sce.RData")
     sce_from_rdata = SingleCellExperiment.fromRData(rdata)
     assays = sce_from_rdata.assays
     assay_names = list(assays.keys())
     for assay in assay_names:
         self.assertTrue(assay in expected_assays)
Exemple #15
0
def umap_by_cluster(rdata, cluster_labels, sampleid, directory):
    sce = SingleCellExperiment.fromRData(rdata)
    umap_dims = sce.reducedDims["UMAP"]
    barcodes = sce.colData["Barcode"]
    umap_dims = numpy.array(umap_dims).reshape(2, len(barcodes))
    filename = os.path.join(directory, "umap_by_cluster.png")
    reduced_dims_by_cluster(cluster_labels, umap_dims, barcodes, filename,
                            "UMAP")
Exemple #16
0
 def test_cell_assign_em(self):
     example_rda = os.path.join(base_dir, "tests/cell_assign_test.RData")
     sce = SingleCellExperiment.fromRData(example_rda)
     cellassigner = CellAssign()
     rho = GeneMarkerMatrix(genes=[
         "Gene161", "Gene447", "Gene519", "Gene609", "Gene677", "Gene750",
         "Gene754", "Gene860", "Gene929", "Gene979"
     ],
                            cells=["Groups1", "Groups2"])
     res = cellassigner.run_em(sce, rho)
Exemple #17
0
def scvis_by_cluster_markers(rdata, tenx_analysis, prefix, pcs,
                             embedding_file):
    try:
        tenx = TenxAnalysis(tenx_analysis)
        tenx.load()
        sce = SingleCellExperiment.fromRData(rdata)
        cluster_labels = tenx.markers_by_clusters(
            sce, rep="SCVIS", pcs=pcs, embedding_file=embedding_file)
    except Exception as e:
        return
Exemple #18
0
def cluster_markers(rdata, tenx_analysis, rep, pcs, embedding_file, prefix):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    markers = tenx.markers_by_clusters(sce, rep="PCA", pcs=pcs)
    markers_by_cluster = list(zip(*markers["rank_genes_groups"]["names"]))
    for i, markers in enumerate(markers_by_cluster):
        cluster_prefix = "Cluster {} {}".format(i, prefix)
        plot_by_markers(rdata, tenx_analysis, markers, cluster_prefix, rep,
                        pcs, embedding_file)
Exemple #19
0
 def test_save_and_load_rdata(self):
     print("Reading")
     tenx = DropletUtils()
     rs4_result = tenx.read10xCounts("/home/ceglian/data/raw_gene_bc_matrices/hg19/")
     sce = SingleCellExperiment.fromRS4(rs4_result)
     print("Writing")
     sce.save("tests/sce_1.rdata")
     print("Loading...")
     sce_saved = SingleCellExperiment.fromRData("tests/sce_1.rdata")
     print(sce_saved.assays["counts"].shape)
Exemple #20
0
 def test_expression_normalization(self):
     rdata = os.path.join(base_dir, "tests/example_sce.RData")
     sce_from_rdata = SingleCellExperiment.fromRData(rdata)
     tenx = TenX()
     assay = sce_from_rdata.assays["counts"]
     cpm = tenx.calculateCPM(assay)
     tpm = tenx.calculateTPM(assay)
     fpkm = tenx.calculateFPKM(assay)
     assert cpm.shape == assay.shape
     assert tpm.shape == assay.shape
     assert fpkm.shape == assay.shape
Exemple #21
0
def exportRData(rdata, directory, delim="\t"):
    if not os.path.exists(directory):
        os.makedirs(directory)
    sce = SingleCellExperiment.fromRData(rdata)
    output = open(os.path.join(directory,"meta.txt"),"w")
    output.write("sizeFactors: " + str(sce.sizeFactors) + "\n")
    output.write("reducedDims: " + str(sce.reducedDims) + "\n")
    for assay in sce.assayNames:
        filename = os.path.join(directory,"{}.csv".format(assay))
        print(filename)
        dataframe = sce.assay(assay)
        dataframe.to_csv(filename,sep=delim)
Exemple #22
0
 def test_clone_align(self):
     example_rda = os.path.join(base_dir, "tests/example_sce.rda")
     sce = SingleCellExperiment.fromRData(example_rda)
     rowdata = sce.rowData
     cnv_data = []
     for column in ["A","B","C"]:
         column = rowdata[column]
         cnv_data.append(column)
     cnv_data = numpy.transpose(numpy.array(cnv_data))
     print(cnv_data.shape)
     clonealigner = CloneAlign()
     result = clonealigner.run(sce,cnv_data)
     assert len(result["clone"]) == 200
Exemple #23
0
def tsne_by_cell_type(rdata, fit, sampleid, directory, known_types=None):
    sce = SingleCellExperiment.fromRData(rdata)
    tsne_dims = sce.reducedDims["TSNE"]
    barcodes = sce.colData["Barcode"]
    cell_types = dict(zip(fit["Barcode"], fit["cell_type"]))
    tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes))
    filename = os.path.join(directory, "tsne_by_cell_type.png")
    reduced_dims_by_cell_type(cell_types,
                              tsne_dims,
                              barcodes,
                              filename,
                              "TSNE",
                              known_types=known_types)
Exemple #24
0
def IntegratedSummary(sce, sampleid, report):
    if not os.path.exists("viz/"):
        os.makedirs("viz")
    if not os.path.exists("viz/html/"):
        os.makedirs("viz/html/")
    if not os.path.exists("viz/{}.json".format(sampleid)):
        sce = SingleCellExperiment.fromRData(sce)
        column_data = dump_all_coldata(sce)
        patient_data = collections.defaultdict(dict)
        patient_data[sampleid]["celldata"] = column_data
        gene_data = dump_all_rowdata(sce)
        patient_data[sampleid]["genedata"] = gene_data
        logcounts = sce.assays["logcounts"].todense().tolist()
        log_count_matrix = collections.defaultdict(dict)
        for symbol, row in zip(gene_data["Symbol"], logcounts):
            for barcode, cell in zip(column_data["Barcode"], row):
                if float(cell) != 0.0:
                    log_count_matrix[barcode][symbol] = cell
        patient_data[sampleid]["log_count_matrix"] = dict(log_count_matrix)
        rdims = sce.reducedDims["UMAP"]
        barcodes = sce.colData["Barcode"]
        rdims = numpy.array(rdims).reshape(2, len(barcodes))
        _celltypes = sce.colData["cell_type"]
        celltypes = []
        for celltype in _celltypes:
            if celltype == "Monocyte.Macrophage":
                celltype = "Monocyte/Macrophage"
            else:
                celltype = celltype.replace(".", " ")
            celltypes.append(celltype)
        fit = dict(zip(barcodes, celltypes))
        x_coded = dict(zip(barcodes, rdims[0]))
        y_coded = dict(zip(barcodes, rdims[1]))
        coords = dict()
        for barcode, celltype in fit.items():
            try:
                x_val = int(x_coded[barcode])
                y_val = int(y_coded[barcode])
            except Exception as e:
                continue
            coords[barcode] = (x_val, y_val)
        patient_data[sampleid]["cellassign"] = fit
        patient_data[sampleid]["umap"] = coords
        patient_data["rho"] = GeneMarkerMatrix.read_yaml(
            config.rho_matrix).marker_list
        patient_data_str = json.dumps(patient_data)
        output = open("viz/{}.json".format(sampleid), "w")
        output.write(str(patient_data_str))
        output.close()
    shutil.copyfile("viz/{}.json".format(sampleid), report)
Exemple #25
0
def cell_type_by_cluster(rdata, cell_assign_fit, tenx_analysis, prefix):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    fit = pickle.load(open(cell_assign_fit, "rb"))
    cell_types = dict(zip(fit["Barcode"], fit["cell_type"]))
    sce = SingleCellExperiment.fromRData(rdata)
    cluster_labels = tenx.clusters(sce)
    clusters = dict(zip(sce.colData["Barcode"], cluster_labels))
    data_by_cluster = collections.defaultdict(list)
    data_by_celltype = collections.defaultdict(list)
    cluster = []
    cell_type = []
    for barcode, cell in cell_types.items():
        try:
            cluster.append(str(clusters[barcode]))
            cell_type.append(cell)
            data_by_celltype[cell] = str(clusters[barcode])
            data_by_cluster[str(clusters[barcode])] = cell
        except Exception as e:
            continue
    f, ax = plt.subplots(figsize=(16, 8))
    counts = collections.defaultdict(lambda: collections.defaultdict(int))
    for cluster, ctype in zip(cluster, cell_type):
        counts[cluster][ctype] += 1
    fclusters = []
    fcelltypes = []
    fpercentages = []
    for cluster, ctype in counts.items():
        total = float(sum(ctype.values()))
        for cell in cell_type:
            fcelltypes.append(cell)
            fclusters.append(cluster)
            if cell in ctype:
                fpercentages.append(float(ctype[cell]) / total)
            else:
                fpercentages.append(0.0)
    df = pandas.DataFrame({
        "Cluster": fclusters,
        "Cell Type": fcelltypes,
        "Percentage": fpercentages
    })
    ax = sns.barplot(x="Cluster",
                     y="Percentage",
                     hue="Cell Type",
                     data=df,
                     palette="tab10")
    ax.set_title("Cell Type by Cluster - {}".format(prefix))
    plt.tight_layout()
    plt.savefig("figures/cell_type_by_cluster.png")
Exemple #26
0
 def create_input_files(rdata, components, output):
     sce = SingleCellExperiment.fromRData(rdata)
     embedding = sce.getReducedDims("PCA", n=components)
     counts = []
     for i in range(0, len(embedding), components):
         counts.append(embedding[i:i + (components)])
     counts = numpy.array(counts[0])
     print(counts.shape)
     header = []
     for c in range(components):
         header.append("PC_{}".format(c))
     header = "\t".join(header)
     filename = os.path.join(output, "matrix.tsv")
     numpy.savetxt(filename, counts, delimiter="\t", header=header)
     return filename
Exemple #27
0
 def qcd_sce(self):
     # if not os.path.exists(self.qcdrdata):
     #     qc = QualityControl(self)
     #     qc.build()
     #     qc.filter()
     #
     #     # TenX.read10xCountsFiltered(self,self.rdata)
     #     # rscript = ScaterCode(self.directory).generate_script()
     #     # cwd = os.getcwd()
     #     # os.chdir(self.directory)
     #     # print(os.getcwd())
     #     # cmd = ["Rscript",os.path.split(rscript)[-1],self.rdata,self.qcdrdata]
     #     # subprocess.call(cmd)
     #     # os.chdir(cwd)
     # print (self.qcdrdata)
     return SingleCellExperiment.fromRData(self.qcdrdata)
Exemple #28
0
def umap_by_cluster(rdata, tenx, prefix, pcs):
    sce = SingleCellExperiment.fromRData(rdata)
    cluster_labels = tenx.clusters(sce, pcs=pcs)
    tsne_dims = sce.reducedDims["UMAP"]
    barcodes = sce.colData["Barcode"]
    tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes))
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    x = []
    y = []
    clusters = []
    for barcode, cluster in cluster_labels.items():
        clusters.append("Cluster {}".format(cluster))
        x.append(x_coded[barcode])
        y.append(y_coded[barcode])
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("UMAP - Clusters - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("{}/umap_by_cluster.png".format(prefix))
Exemple #29
0
def marker_analysis(sce, tenx, rho, cell_assign_fit, figure):
    sce = SingleCellExperiment.fromRData(sce)
    fit = pickle.load(open(cell_assign_fit, "rb"))
    gene_markers = []
    for markers in rho.values():
        gene_markers += markers
    _marker_genes = list(set(gene_markers))
    convert = tenx.gene_map(sce)
    marker_genes = []
    for gene in _marker_genes:
        try:
            marker_genes.append(convert[gene])
        except KeyError:
            marker_genes.append(gene)
            print('No conversion for ', gene)
    print(marker_genes)
    adata = tenx.create_scanpy_adata(sce, fast_load=False)
    print(len(adata.obs.index))
    print(len(fit["cell_type"]))
    cell_types = []
    _cell_types = dict(zip(fit["Barcode"], fit["cell_type"]))
    for barcode in adata.obs.index:
        try:
            cell_types.append(_cell_types[barcode])
        except KeyError as e:
            cell_types.append("Other")
    adata.obs["Cell Type"] = cell_types
    print(len(cell_types))
    marker_genes = list(set(marker_genes).intersection(set(adata.var.index)))
    print(len(marker_genes))
    print(marker_genes)
    sc.pl.dotplot(adata, marker_genes, groupby='Cell Type', save="matrix.png")
    sc.pl.stacked_violin(adata,
                         marker_genes,
                         groupby='Cell Type',
                         rotation=90,
                         save="vin_stacked.png")
    return ["dot_plot.png", "stacked_violinvin_stacked.png"]
Exemple #30
0
def create_workflow():

    workflow = pypeliner.workflow.Workflow()

    bcl_directory = args.get("bcl", None)
    fastq_directories = args.get("fastqs")
    aggregate = args.get("aggregate_mlibs", list())
    agg_type = args.get("agg_method", "scanorama")
    libbase = args.get("lib_base", None)
    additional = args.get("additional", [])
    prefix = config.prefix
    output = config.jobpath
    recipe = args.get("recipe", "basic")

    try:
        cellranger_folder = os.path.join(output, prefix)
        os.makedirs(cellranger_folder)
    except Exception as e:
        pass

    if fastq_directories == None:
        fastq_directories = []

    results = Results(output)
    runner = PrimaryRun(workflow, prefix, output)
    """
    Aggregating Libraries
    """

    if aggregate != None and len(aggregate) > 0:
        if agg_type == "tenx":
            runner.aggregate_libraries_tenx(aggregate, libbase)
            args["tenx"] = os.path.join(output, "run_{}/outs".format(prefix))
        if agg_type == "scanorama":
            runner.aggregate_libraries_scanorama()
    """
    Setup
    """
    tenx_analysis = args.get("tenx", None)

    bcls = runner.set_bcl(bcl_directory)
    fastqs = runner.set_fastq(fastq_directories)
    workflow = runner.get_workflow()

    tenx_analysis = args.get("tenx", None)

    if fastqs != []:
        tenx_analysis = os.path.join(config.jobpath, prefix, "outs")

    rdata = args.get("rdata", None)

    secondary_analysis = SecondaryAnalysis(workflow, prefix, output)
    tenx = TenxAnalysis(tenx_analysis)
    """
    QC
    """

    secondary_analysis.run_scater()
    secondary_analysis.build_sce(tenx)
    secondary_analysis.set_rdata(rdata)

    results.add_analysis(tenx_analysis)
    results.add_workflow(secondary_analysis.rscript)
    results.add_sce(secondary_analysis.sce)

    umi = os.path.join(output, "figures/umi_distribution.png")
    mito = os.path.join(output, "figures/mito_distribution.png")
    ribo = os.path.join(output, "figures/ribo_distribution.png")
    freq = os.path.join(output, "figures/highestExprs.png")
    tech = os.path.join(output, "figures/mean_variance_trend.png")
    high_var = os.path.join(output, "figures/highly_variable_genes.png")

    results.add_plot(umi, "UMI Distribution")
    results.add_plot(mito, "Mito Distribution")
    results.add_plot(ribo, "Ribo Distribution")
    results.add_plot(freq, "Highest Frequency")
    results.add_plot(tech, "Mean Variance Trend")
    results.add_plot(high_var, "Highly Variable Genes")

    results.add_cellassign_pkl(secondary_analysis.cell_assign_fit)
    results.add_cellassign_raw(secondary_analysis.cell_assign_rdata)
    """
    Differential Expression
    """
    if config.run_de:
        other_samples = []
        for other_sample in compare:
            print("blah")
            exit(0)
            secondary_analysis.run_de(other_sample)
    """
    CellAssign
    """
    if config.run_cellassign:
        tenx = TenxAnalysis(tenx_analysis)
        if hasattr(config, "rho_matrix"):
            rho_matrix = eval(open(config.rho_matrix, "r").read())
        elif hasattr(config, "tissue"):
            sce = SingleCellExperiment.fromRData(secondary_analysis.sce)
            rho_matrix = generate_json(tenx, sce, config.organ)
        else:
            raise AssertionError("Not implemented.")
        secondary_analysis.run_cell_assign(rho_matrix,
                                           tenx_analysis,
                                           additional=combine_assign)
        results.add_cellassign_pkl(secondary_analysis.cell_assign_fit)
        results.add_cellassign_raw(secondary_analysis.cell_assign_rdata)

        path = secondary_analysis.plot_cell_types()
        results.add_plot(path, "Cell Type Frequency")
        path = secondary_analysis.plot_cell_type_by_cluster(tenx_analysis)
        results.add_plot(path, "Cell Type by Cluster")

        path = secondary_analysis.plot_tsne_by_cell_type()
        results.add_plot(path, "TSNE by Cell Type")

        path = secondary_analysis.plot_pca_by_cell_type()
        results.add_plot(path, "PCA by Cell Type")

        # path = secondary_analysis.plot_umap_by_cell_type()
        # results.add_plot(path, "UMAP by Cell Type")

        path1, path2 = secondary_analysis.marker_analysis(tenx, rho_matrix)
        results.add_plot(path1, "Heat Marker Gene Matrix")
        results.add_plot(path2, "Stacked Vin Marker Gene Matrix")
    """
    SCVis
    """
    if config.run_scvis:
        secondary_analysis.run_scviz(config.perplexity, config.components)
    """
    CloneAlign
    """
    if config.run_clonealign and config.copy_number_data is not None and config.clone_assignments is not None:
        secondary_analysis.run_clone_align(tenx, config.copy_number_data,
                                           config.clone_assignments)

    if config.plot_scvis:
        embedding_file = "{0}_{1}/perplexity_{0}_regularizer_0.001_batch_size_512_learning_rate_0.01_latent_dimension_2_activation_ELU_seed_1_iter_3000.tsv".format(
            config.perplexity, config.components)
        path = secondary_analysis.plot_scvis_by_cluster(tenx_analysis,
                                                        embedding_file,
                                                        pcs=config.components)
        path = os.path.join(output, path)
        results.add_plot(path, "SCVis by Cluster")

        if os.path.exists(config.run_cellassign):
            path = secondary_analysis.plot_scvis_by_cell_type(
                embedding_file, pcs=config.components)
            results.add_plot(path, "SCVIS by Cell Type")
    """
    Cluster Analysis
    """
    if config.clustering:
        path = secondary_analysis.plot_pca_by_cluster(tenx_analysis,
                                                      pcs=config.components)
        results.add_plot(path, "PCA by Cluster")

        path = secondary_analysis.plot_tsne_by_cluster(tenx_analysis,
                                                       pcs=config.components)
        results.add_plot(path, "TSNE by Cluster")

        path = secondary_analysis.plot_umap_by_cluster(tenx_analysis,
                                                       pcs=config.components)
        results.add_plot(path, "UMAP by Cluster")

        secondary_analysis.plot_cluster_markers(tenx_analysis,
                                                rep="PCA",
                                                pcs=config.components)

        pca_cluster_markers = glob.glob("figures/expression/*pca*png")
        for png in pca_cluster_markers:
            title = png.split("/")[-1].replace(".png", "").replace(
                "counts", "gene markers").upper().replace("_", "")
            results.add_plot(png, title)

        secondary_analysis.plot_cluster_markers(tenx_analysis,
                                                rep="TSNE",
                                                pcs=config.components)

        pca_cluster_markers = glob.glob("figures/expression/*tsne*png")
        for png in pca_cluster_markers:
            title = png.split("/")[-1].replace(".png", "").replace(
                "counts", "gene markers").upper().replace("_", "")
            results.add_plot(png, title)

        secondary_analysis.plot_cluster_markers(tenx_analysis,
                                                rep="UMAP",
                                                pcs=config.components)

        pca_cluster_markers = glob.glob("figures/expression/*umap*png")
        for png in pca_cluster_markers:
            title = png.split("/")[-1].replace(".png", "").replace(
                "counts", "gene markers").upper().replace("_", "")
            results.add_plot(png, title)

        embedding_file = "{0}_{1}/perplexity_{0}_regularizer_0.001_batch_size_512_learning_rate_0.01_latent_dimension_2_activation_ELU_seed_1_iter_3000.tsv".format(
            config.perplexity, config.components)
        secondary_analysis.plot_cluster_markers(tenx_analysis,
                                                rep="SCVIS",
                                                pcs=config.components,
                                                embedding_file=embedding_file)

        pca_cluster_markers = glob.glob("figures/expression/*scvis_5_50*png")
        for png in pca_cluster_markers:
            title = png.split("/")[-1].replace(".png", "").replace(
                "counts", "gene markers").upper().replace("_", "")
            results.add_plot(png, title)
    """
    Gene Level
    """
    """
    Reporting
    """
    if config.report:
        workflow.transform(name="{}_markdown".format(prefix),
                           func=exportMD,
                           args=(results, ))

    if config.report:
        workflow.transform(name="{}_finalize".format(prefix),
                           func=exportFinalize,
                           args=(results, ))

    workflow = secondary_analysis.get_workflow()
    return workflow