コード例 #1
0
def RunKallisto(sample_to_path, bus_output):
    sample = json.loads(open(sample_to_path, "r").read())
    sampleid, path = list(sample.items()).pop()
    fastqs = [FastQDirectory(sampleid, config.prefix, config.jobpath, path)]
    krunner = Kallisto(fastqs[0], sampleid)
    bus_path = krunner.count()
    open(bus_output, "w").write(bus_path)
コード例 #2
0
 def mkfastq(bcl_object):
     args = dict()
     args["id"] = bcl_object.id
     args["run"] = bcl_object.path
     args["csv"] = bcl_object.csv
     cmd = CellRanger.cmd("mkfastq", args)
     subprocess.call(cmd)
     return FastQDirectory(bcl_object.out())
コード例 #3
0
def main():
    sample = "TENX065"
    fastq = "/data/AHT52JBGXB/"
    output = "./"
    fastq_directory = FastQDirectory(fastq, sample, output)

    krunner = Kallisto(fastq_directory)
    tenx_path = krunner.count()
    print(tenx_path)
コード例 #4
0
 def set_fastq(self, fastq_directories):
     print(fastq_directories)
     self.fastq_directories = fastq_directories
     self.fastqs = []
     for fastq_directory in self.fastq_directories:
         fastq = FastQDirectory(fastq_directory, self.prefix, self.output, datapath=config.datapath)
         if not fastq.check_status():
             self.fastqs.append(fastq)
     if len(self.fastqs) > 0:
         self.workflow.transform (
             name = "{}_cellranger_counts".format(self.prefix),
             func = CellRanger.count,
             args = (
                 self.fastqs,
             )
         )
     else:
         print("No FastQ to run.")
     return self.fastqs
コード例 #5
0
def RunKallisto(sampleid, finished):
    fastqs = [
        FastQDirectory(sampleid, config.prefix, config.jobpath,
                       config.datapath)
    ]
    krunner = Kallisto(fastqs[0], sampleid)
    tenx_path = krunner.count()
    # tenx = TenxAnalysis(tenx_path)
    # tenxds = TenxDataStorage(sampleid, species=species)
    # tenx.bus_finalize()
    open(finished, "w").write("Completed")
コード例 #6
0
def main():
    sample = "patient2"

    tenx = TenxDataStorage(sample, version="v2")
    tenx.download()
    tenx_analysis = TenxAnalysis(tenx.tenx_path)
    tenx_analysis.load()
    output = "/igo_large/scratch/test_kallisto"
    fastq_directory = FastQDirectory(
        "/igo_large/scratch/allen/bams/xfastqs2/McGilvery_Sonya__TLH_MissingLibrary_1_CB8R9ANXX/",
        sample, output)

    krunner = Kallisto(fastq_directory, tenx_analysis)
    krunner.de()
コード例 #7
0
 def count(fastqs):
     print("Running Cellranger")
     fastqs = [FastQDirectory(fastq, config.prefix, config.jobpath, config.datapath) for fastq in fastqs]
     args = dict()
     fastq_files = []
     args["id"] = "_".join([fastq.id for fastq in fastqs])
     paths = [fastq.path for fastq in fastqs]
     args["fastqs"] = ",".join(paths)
     try:
         args["sample"] = ",".join([fastq.samples.sampleid[0] for fastq in fastqs])
     except Exception as e:
         pass
     args["transcriptome"] = config.reference
     if config.chemistry is not None:
         args["chemistry"] = config.chemistry
     cmd = CellRanger.cmd("count",args)
     print("Running ", " ".join(cmd))
     subprocess.call(cmd)
コード例 #8
0
 def count(fastqs, reference_override=False):
     print("Running Cellranger")
     fastqs = [
         FastQDirectory(fastq, config.prefix, config.jobpath,
                        config.datapath) for fastq in fastqs
     ]
     args = dict()
     fastq_files = []
     args["id"] = "_".join([fastq.id for fastq in fastqs])
     paths = [fastq.path for fastq in fastqs]
     args["fastqs"] = ",".join(paths)
     try:
         args["sample"] = ",".join(
             [fastq.samples.sampleid[0] for fastq in fastqs])
     except Exception as e:
         pass
     args["transcriptome"] = config.reference
     if reference_override:
         args["transcriptome"] = reference_override
         if "mm10" in args["transcriptome"]:
             script = "cellranger_mouse.sh"
             output = open(script, "w")
             args["id"] += "_mouse"
         else:
             script = "cellranger_human.sh"
             output = open(script, "w")
     else:
         args["transcriptome"] = config.reference
         script = "cellranger_human.sh"
         output = open(script, "w")
     if config.chemistry is not None:
         args["chemistry"] = config.chemistry
     cmd = CellRanger.cmd("count", args)
     print("Saving command to submission script ", " ".join(cmd))
     output.write("source /codebase/cellranger-3.0.2/sourceme.bash\n")
     output.write(" ".join(cmd) + "\n")
     output.close()
     result = subprocess.check_output(["bash", script])
     print("Cellranger exit: {}".format(result))
コード例 #9
0
ファイル: pipeline.py プロジェクト: huzheng16/SCRNApipeline
def create_workflow():
    """
    Generates tasks as Pypeliner workflow based on input arguments.
    The workflow will start from the most raw input provided and override
    any downstream tasks with subsequently provided input arguments.
    Parellelization is performed over provided samplesheets and replication
    within samples.

    Args:
        None

    Yields:
        Pypeliner workflow object.
    """

    bcl_directory = args.get("bcl", None)
    fastq_directory = args.get("fastq", None)
    tenx_analysis = args.get("tenx", None)
    rdata = args.get("rdata", None)

    bcl = BinaryBaseCall(bcl_directory)

    workflow = pypeliner.workflow.Workflow()

    if bcl_directory:
        workflow.transform (
            name = "bcl_to_fastq",
            func = CellRanger.mkfastq,
            ret = pypeliner.managed.TempOutputObj("fastq_object"),
            args = (
                bcl_object,
            )
        )

    if bcl_directory != None or fastq_directory != None:
        if fastq_directory != None:
            fastq = FastQDirectory(fastq_directory)
            # fastqs = list()
            # for sample_sheet in glob.iglob(os.path.join(fastq_directory, "**/*.csv")):
        else:
            fastq = pypeliner.managed.TempInputObj("fastq_object")
        workflow.transform (
            name = "fastq_counts",
            func = CellRanger.count,
            ret = pypeliner.managed.TempOutputObj("tenx_analysis"),
            args = (
                fastq,
            )
        )

    tenx = None
    if tenx_analysis != None and rdata == None:
        tenx = TenxAnalysis(tenx_analysis)
    elif tenx_analysis == None and rdata == None:
        tenx = pypeliner.managed.TempInputObj("tenx_analysis")
    if tenx != None:
        workflow.transform (
            name = "tenx_read10xcounts",
            func = TenX.read10xCounts,
            ret = pypeliner.managed.TempOutputObj("single_cell_experiment"),
            args = (
                tenx,
            )
        )

    if rdata != None:
        single_cell_experiment = TenxAnalysis.from_rdata(rdata)
    else:
        single_cell_experiment = pypeliner.managed.TempInputObj("single_cell_experiment")

    #
    # workflow.transform (
    #     name = "tenx_barcoderanks",
    #     func = TenX.barcodeRanks,
    #     args = (
    #         pypeliner.managed.TempInputObj("tenx_analysis"),
    #     )
    # )
    #
    # workflow.transform (
    #     name = "tenx_emptydrops",
    #     func = TenX.emptyDrops,
    #     args = (
    #         pypeliner.managed.TempInputObj("tenx_analysis"),
    #     )
    # )

    workflow.transform (
        name = "clonealign",
        func = CloneAlign.run,
        ret = pypeliner.managed.TempOutputObj("clone_align_fit"),
        args = (
            single_cell_experiment,
        )
    )

    # """
    # workflow.transform (
    #     name = "cellasign",
    #     func = CellAssign.run_em,
    #     ret = pypeliner.managed.TempOutputObj("cell_assignments"),
    #     args = (
    #         single_cell_experiment,
    #     )
    # )
    #
    # workflow.transform (
    #     name = "scviz",
    #     func = SCViz.run,
    #     ret = pypeliner.managed.TempOutputObj("scviz_dim_reduction"),
    #     args = (
    #         single_cell_experiment,
    #     )
    # )
    #
    # workflow.transform (
    #     name = "html_output",
    #     func = HTMLResults.generate,
    #     args = (
    #         pypeliner.managed.TempInputObj("fastq_object")
    #         pypeliner.managed.TempInputObj("ten_analysis"),
    #         pypeliner.managed.TempInputObj("single_cell_experiment"),
    #         pypeliner.managed.TempInputObj("clone_align_fit"),
    #         pypeliner.managed.TempInputObj("cell_assignments"),
    #         pypeliner.managed.TempInputObj("scviz_dim_reduction"),
    #     )
    # )
    """

    return workflow
コード例 #10
0
 def test_mapped_scvis(self):
     fastq = FastQDirectory("/Users/ceglian/share/MICHELLE_0065_AHGNCGDMXX/Project_06000_EJ/Sample_cDNA_Pre_IGO_06000_EJ_1/", "pre_65_0", "/Users/ceglian/project_data/pre_65/")
     tenx = TenxAnalysis(fastq.results)
     SCViz.train("/Users/ceglian/project_data/pre_65/sce_final.rdata",tenx,"/Users/ceglian/project_data/pre_65/clone_align_fit.rdata")
コード例 #11
0
 def test_fastqc(self):
     output = "./tests/fastqc_test/"
     fastq_path = "/Users/ceglian/input_data/MICHELLE_0065_AHGNCGDMXX/Project_06000_EJ/Sample_cDNA_Pre_IGO_06000_EJ_1/"
     fqobj = FastQDirectory(fastq_path)
     fastqc = FastQC()
     fastqc.run(fqobj, output)
コード例 #12
0
def DownloadFastqs(sampleid, finished):
    fastqs = [FastQDirectory(fastq, config.prefix, config.jobpath, config.datapath) for fastq in [sampleid]]
    fastqs = glob.glob(os.path.join(config.datapath,"*.fastq.gz"))
    assert len(fastqs) > 0, "No Fastqs Download or Found."
    open(finished,"w").write("Completed")
コード例 #13
0
            thresholds = (0.05, 0.01, 0.001)
            import collections
            sig_genes = collections.defaultdict(list)
            for gene, pvalue in sorted_genes:
                for threshold in thresholds:
                    if pvalue < threshold:
                        sig_genes[str(threshold)].append(gene)
            print("**************** Differential Genes ********************")
            for thresh, sig_genes in sig_genes.items():
                print(thresh, len(sig_genes))
            for gene, pvalue in sorted_genes[:100]:
                print(gene, pvalue)
        return sorted_genes


if __name__ == '__main__':
    from itertools import combinations
    samples = ["Y7640", "Y7652", "Y7668", "Y8841"]
    fastqs = {}
    for sample in samples:
        output = "/igo_large/scratch/de_ciara/"
        fastq_directory = FastQDirectory("/igo_large/data/{}".format(sample),
                                         sample, output)
        fastqs[sample] = fastq_directory

    pairwise = combinations(samples, 2)
    for pair in pairwise:
        fastq_set = [fastqs[pair[0]], fastqs[pair[1]]]
        de = DifferentialExpression(pair, fastq_set)
        res = de.logistic_regression()