Example #1
0
def RunKallisto(sample_to_path, bus_output):
    sample = json.loads(open(sample_to_path, "r").read())
    sampleid, path = list(sample.items()).pop()
    fastqs = [FastQDirectory(sampleid, config.prefix, config.jobpath, path)]
    krunner = Kallisto(fastqs[0], sampleid)
    bus_path = krunner.count()
    open(bus_output, "w").write(bus_path)
 def mkfastq(bcl_object):
     args = dict()
     args["id"] = bcl_object.id
     args["run"] = bcl_object.path
     args["csv"] = bcl_object.csv
     cmd = CellRanger.cmd("mkfastq", args)
     subprocess.call(cmd)
     return FastQDirectory(bcl_object.out())
Example #3
0
def main():
    sample = "TENX065"
    fastq = "/data/AHT52JBGXB/"
    output = "./"
    fastq_directory = FastQDirectory(fastq, sample, output)

    krunner = Kallisto(fastq_directory)
    tenx_path = krunner.count()
    print(tenx_path)
Example #4
0
 def set_fastq(self, fastq_directories):
     print(fastq_directories)
     self.fastq_directories = fastq_directories
     self.fastqs = []
     for fastq_directory in self.fastq_directories:
         fastq = FastQDirectory(fastq_directory, self.prefix, self.output, datapath=config.datapath)
         if not fastq.check_status():
             self.fastqs.append(fastq)
     if len(self.fastqs) > 0:
         self.workflow.transform (
             name = "{}_cellranger_counts".format(self.prefix),
             func = CellRanger.count,
             args = (
                 self.fastqs,
             )
         )
     else:
         print("No FastQ to run.")
     return self.fastqs
def RunKallisto(sampleid, finished):
    fastqs = [
        FastQDirectory(sampleid, config.prefix, config.jobpath,
                       config.datapath)
    ]
    krunner = Kallisto(fastqs[0], sampleid)
    tenx_path = krunner.count()
    # tenx = TenxAnalysis(tenx_path)
    # tenxds = TenxDataStorage(sampleid, species=species)
    # tenx.bus_finalize()
    open(finished, "w").write("Completed")
Example #6
0
def main():
    sample = "patient2"

    tenx = TenxDataStorage(sample, version="v2")
    tenx.download()
    tenx_analysis = TenxAnalysis(tenx.tenx_path)
    tenx_analysis.load()
    output = "/igo_large/scratch/test_kallisto"
    fastq_directory = FastQDirectory(
        "/igo_large/scratch/allen/bams/xfastqs2/McGilvery_Sonya__TLH_MissingLibrary_1_CB8R9ANXX/",
        sample, output)

    krunner = Kallisto(fastq_directory, tenx_analysis)
    krunner.de()
Example #7
0
 def count(fastqs):
     print("Running Cellranger")
     fastqs = [FastQDirectory(fastq, config.prefix, config.jobpath, config.datapath) for fastq in fastqs]
     args = dict()
     fastq_files = []
     args["id"] = "_".join([fastq.id for fastq in fastqs])
     paths = [fastq.path for fastq in fastqs]
     args["fastqs"] = ",".join(paths)
     try:
         args["sample"] = ",".join([fastq.samples.sampleid[0] for fastq in fastqs])
     except Exception as e:
         pass
     args["transcriptome"] = config.reference
     if config.chemistry is not None:
         args["chemistry"] = config.chemistry
     cmd = CellRanger.cmd("count",args)
     print("Running ", " ".join(cmd))
     subprocess.call(cmd)
 def count(fastqs, reference_override=False):
     print("Running Cellranger")
     fastqs = [
         FastQDirectory(fastq, config.prefix, config.jobpath,
                        config.datapath) for fastq in fastqs
     ]
     args = dict()
     fastq_files = []
     args["id"] = "_".join([fastq.id for fastq in fastqs])
     paths = [fastq.path for fastq in fastqs]
     args["fastqs"] = ",".join(paths)
     try:
         args["sample"] = ",".join(
             [fastq.samples.sampleid[0] for fastq in fastqs])
     except Exception as e:
         pass
     args["transcriptome"] = config.reference
     if reference_override:
         args["transcriptome"] = reference_override
         if "mm10" in args["transcriptome"]:
             script = "cellranger_mouse.sh"
             output = open(script, "w")
             args["id"] += "_mouse"
         else:
             script = "cellranger_human.sh"
             output = open(script, "w")
     else:
         args["transcriptome"] = config.reference
         script = "cellranger_human.sh"
         output = open(script, "w")
     if config.chemistry is not None:
         args["chemistry"] = config.chemistry
     cmd = CellRanger.cmd("count", args)
     print("Saving command to submission script ", " ".join(cmd))
     output.write("source /codebase/cellranger-3.0.2/sourceme.bash\n")
     output.write(" ".join(cmd) + "\n")
     output.close()
     result = subprocess.check_output(["bash", script])
     print("Cellranger exit: {}".format(result))
Example #9
0
def create_workflow():
    """
    Generates tasks as Pypeliner workflow based on input arguments.
    The workflow will start from the most raw input provided and override
    any downstream tasks with subsequently provided input arguments.
    Parellelization is performed over provided samplesheets and replication
    within samples.

    Args:
        None

    Yields:
        Pypeliner workflow object.
    """

    bcl_directory = args.get("bcl", None)
    fastq_directory = args.get("fastq", None)
    tenx_analysis = args.get("tenx", None)
    rdata = args.get("rdata", None)

    bcl = BinaryBaseCall(bcl_directory)

    workflow = pypeliner.workflow.Workflow()

    if bcl_directory:
        workflow.transform (
            name = "bcl_to_fastq",
            func = CellRanger.mkfastq,
            ret = pypeliner.managed.TempOutputObj("fastq_object"),
            args = (
                bcl_object,
            )
        )

    if bcl_directory != None or fastq_directory != None:
        if fastq_directory != None:
            fastq = FastQDirectory(fastq_directory)
            # fastqs = list()
            # for sample_sheet in glob.iglob(os.path.join(fastq_directory, "**/*.csv")):
        else:
            fastq = pypeliner.managed.TempInputObj("fastq_object")
        workflow.transform (
            name = "fastq_counts",
            func = CellRanger.count,
            ret = pypeliner.managed.TempOutputObj("tenx_analysis"),
            args = (
                fastq,
            )
        )

    tenx = None
    if tenx_analysis != None and rdata == None:
        tenx = TenxAnalysis(tenx_analysis)
    elif tenx_analysis == None and rdata == None:
        tenx = pypeliner.managed.TempInputObj("tenx_analysis")
    if tenx != None:
        workflow.transform (
            name = "tenx_read10xcounts",
            func = TenX.read10xCounts,
            ret = pypeliner.managed.TempOutputObj("single_cell_experiment"),
            args = (
                tenx,
            )
        )

    if rdata != None:
        single_cell_experiment = TenxAnalysis.from_rdata(rdata)
    else:
        single_cell_experiment = pypeliner.managed.TempInputObj("single_cell_experiment")

    #
    # workflow.transform (
    #     name = "tenx_barcoderanks",
    #     func = TenX.barcodeRanks,
    #     args = (
    #         pypeliner.managed.TempInputObj("tenx_analysis"),
    #     )
    # )
    #
    # workflow.transform (
    #     name = "tenx_emptydrops",
    #     func = TenX.emptyDrops,
    #     args = (
    #         pypeliner.managed.TempInputObj("tenx_analysis"),
    #     )
    # )

    workflow.transform (
        name = "clonealign",
        func = CloneAlign.run,
        ret = pypeliner.managed.TempOutputObj("clone_align_fit"),
        args = (
            single_cell_experiment,
        )
    )

    # """
    # workflow.transform (
    #     name = "cellasign",
    #     func = CellAssign.run_em,
    #     ret = pypeliner.managed.TempOutputObj("cell_assignments"),
    #     args = (
    #         single_cell_experiment,
    #     )
    # )
    #
    # workflow.transform (
    #     name = "scviz",
    #     func = SCViz.run,
    #     ret = pypeliner.managed.TempOutputObj("scviz_dim_reduction"),
    #     args = (
    #         single_cell_experiment,
    #     )
    # )
    #
    # workflow.transform (
    #     name = "html_output",
    #     func = HTMLResults.generate,
    #     args = (
    #         pypeliner.managed.TempInputObj("fastq_object")
    #         pypeliner.managed.TempInputObj("ten_analysis"),
    #         pypeliner.managed.TempInputObj("single_cell_experiment"),
    #         pypeliner.managed.TempInputObj("clone_align_fit"),
    #         pypeliner.managed.TempInputObj("cell_assignments"),
    #         pypeliner.managed.TempInputObj("scviz_dim_reduction"),
    #     )
    # )
    """

    return workflow
Example #10
0
 def test_mapped_scvis(self):
     fastq = FastQDirectory("/Users/ceglian/share/MICHELLE_0065_AHGNCGDMXX/Project_06000_EJ/Sample_cDNA_Pre_IGO_06000_EJ_1/", "pre_65_0", "/Users/ceglian/project_data/pre_65/")
     tenx = TenxAnalysis(fastq.results)
     SCViz.train("/Users/ceglian/project_data/pre_65/sce_final.rdata",tenx,"/Users/ceglian/project_data/pre_65/clone_align_fit.rdata")
Example #11
0
 def test_fastqc(self):
     output = "./tests/fastqc_test/"
     fastq_path = "/Users/ceglian/input_data/MICHELLE_0065_AHGNCGDMXX/Project_06000_EJ/Sample_cDNA_Pre_IGO_06000_EJ_1/"
     fqobj = FastQDirectory(fastq_path)
     fastqc = FastQC()
     fastqc.run(fqobj, output)
def DownloadFastqs(sampleid, finished):
    fastqs = [FastQDirectory(fastq, config.prefix, config.jobpath, config.datapath) for fastq in [sampleid]]
    fastqs = glob.glob(os.path.join(config.datapath,"*.fastq.gz"))
    assert len(fastqs) > 0, "No Fastqs Download or Found."
    open(finished,"w").write("Completed")
Example #13
0
            thresholds = (0.05, 0.01, 0.001)
            import collections
            sig_genes = collections.defaultdict(list)
            for gene, pvalue in sorted_genes:
                for threshold in thresholds:
                    if pvalue < threshold:
                        sig_genes[str(threshold)].append(gene)
            print("**************** Differential Genes ********************")
            for thresh, sig_genes in sig_genes.items():
                print(thresh, len(sig_genes))
            for gene, pvalue in sorted_genes[:100]:
                print(gene, pvalue)
        return sorted_genes


if __name__ == '__main__':
    from itertools import combinations
    samples = ["Y7640", "Y7652", "Y7668", "Y8841"]
    fastqs = {}
    for sample in samples:
        output = "/igo_large/scratch/de_ciara/"
        fastq_directory = FastQDirectory("/igo_large/data/{}".format(sample),
                                         sample, output)
        fastqs[sample] = fastq_directory

    pairwise = combinations(samples, 2)
    for pair in pairwise:
        fastq_set = [fastqs[pair[0]], fastqs[pair[1]]]
        de = DifferentialExpression(pair, fastq_set)
        res = de.logistic_regression()