Python FastQDirectory Examples, interface.fastqdirectory.FastQDirectory Python Examples

Example #1

0

Show file

def RunKallisto(sample_to_path, bus_output):
    sample = json.loads(open(sample_to_path, "r").read())
    sampleid, path = list(sample.items()).pop()
    fastqs = [FastQDirectory(sampleid, config.prefix, config.jobpath, path)]
    krunner = Kallisto(fastqs[0], sampleid)
    bus_path = krunner.count()
    open(bus_output, "w").write(bus_path)

Example #2

0

Show file

File: cellranger.py Project: DouglasAbrams/scrna-pipeline

 def mkfastq(bcl_object):
     args = dict()
     args["id"] = bcl_object.id
     args["run"] = bcl_object.path
     args["csv"] = bcl_object.csv
     cmd = CellRanger.cmd("mkfastq", args)
     subprocess.call(cmd)
     return FastQDirectory(bcl_object.out())

Example #3

0

Show file

def main():
    sample = "TENX065"
    fastq = "/data/AHT52JBGXB/"
    output = "./"
    fastq_directory = FastQDirectory(fastq, sample, output)

    krunner = Kallisto(fastq_directory)
    tenx_path = krunner.count()
    print(tenx_path)

Example #4

0

Show file

 def set_fastq(self, fastq_directories):
     print(fastq_directories)
     self.fastq_directories = fastq_directories
     self.fastqs = []
     for fastq_directory in self.fastq_directories:
         fastq = FastQDirectory(fastq_directory, self.prefix, self.output, datapath=config.datapath)
         if not fastq.check_status():
             self.fastqs.append(fastq)
     if len(self.fastqs) > 0:
         self.workflow.transform (
             name = "{}_cellranger_counts".format(self.prefix),
             func = CellRanger.count,
             args = (
                 self.fastqs,
             )
         )
     else:
         print("No FastQ to run.")
     return self.fastqs

Example #5

0

Show file

File: run_kallisto.py Project: DouglasAbrams/scrna-pipeline

def RunKallisto(sampleid, finished):
    fastqs = [
        FastQDirectory(sampleid, config.prefix, config.jobpath,
                       config.datapath)
    ]
    krunner = Kallisto(fastqs[0], sampleid)
    tenx_path = krunner.count()
    # tenx = TenxAnalysis(tenx_path)
    # tenxds = TenxDataStorage(sampleid, species=species)
    # tenx.bus_finalize()
    open(finished, "w").write("Completed")

Example #6

0

Show file

def main():
    sample = "patient2"

    tenx = TenxDataStorage(sample, version="v2")
    tenx.download()
    tenx_analysis = TenxAnalysis(tenx.tenx_path)
    tenx_analysis.load()
    output = "/igo_large/scratch/test_kallisto"
    fastq_directory = FastQDirectory(
        "/igo_large/scratch/allen/bams/xfastqs2/McGilvery_Sonya__TLH_MissingLibrary_1_CB8R9ANXX/",
        sample, output)

    krunner = Kallisto(fastq_directory, tenx_analysis)
    krunner.de()

Example #7

0

Show file

 def count(fastqs):
     print("Running Cellranger")
     fastqs = [FastQDirectory(fastq, config.prefix, config.jobpath, config.datapath) for fastq in fastqs]
     args = dict()
     fastq_files = []
     args["id"] = "_".join([fastq.id for fastq in fastqs])
     paths = [fastq.path for fastq in fastqs]
     args["fastqs"] = ",".join(paths)
     try:
         args["sample"] = ",".join([fastq.samples.sampleid[0] for fastq in fastqs])
     except Exception as e:
         pass
     args["transcriptome"] = config.reference
     if config.chemistry is not None:
         args["chemistry"] = config.chemistry
     cmd = CellRanger.cmd("count",args)
     print("Running ", " ".join(cmd))
     subprocess.call(cmd)

Example #8

0

Show file

File: cellranger.py Project: DouglasAbrams/scrna-pipeline

 def count(fastqs, reference_override=False):
     print("Running Cellranger")
     fastqs = [
         FastQDirectory(fastq, config.prefix, config.jobpath,
                        config.datapath) for fastq in fastqs
     ]
     args = dict()
     fastq_files = []
     args["id"] = "_".join([fastq.id for fastq in fastqs])
     paths = [fastq.path for fastq in fastqs]
     args["fastqs"] = ",".join(paths)
     try:
         args["sample"] = ",".join(
             [fastq.samples.sampleid[0] for fastq in fastqs])
     except Exception as e:
         pass
     args["transcriptome"] = config.reference
     if reference_override:
         args["transcriptome"] = reference_override
         if "mm10" in args["transcriptome"]:
             script = "cellranger_mouse.sh"
             output = open(script, "w")
             args["id"] += "_mouse"
         else:
             script = "cellranger_human.sh"
             output = open(script, "w")
     else:
         args["transcriptome"] = config.reference
         script = "cellranger_human.sh"
         output = open(script, "w")
     if config.chemistry is not None:
         args["chemistry"] = config.chemistry
     cmd = CellRanger.cmd("count", args)
     print("Saving command to submission script ", " ".join(cmd))
     output.write("source /codebase/cellranger-3.0.2/sourceme.bash\n")
     output.write(" ".join(cmd) + "\n")
     output.close()
     result = subprocess.check_output(["bash", script])
     print("Cellranger exit: {}".format(result))

Example #9

0

Show file

File: pipeline.py Project: huzheng16/SCRNApipeline

def create_workflow():
    """
    Generates tasks as Pypeliner workflow based on input arguments.
    The workflow will start from the most raw input provided and override
    any downstream tasks with subsequently provided input arguments.
    Parellelization is performed over provided samplesheets and replication
    within samples.

    Args:
        None

    Yields:
        Pypeliner workflow object.
    """

    bcl_directory = args.get("bcl", None)
    fastq_directory = args.get("fastq", None)
    tenx_analysis = args.get("tenx", None)
    rdata = args.get("rdata", None)

    bcl = BinaryBaseCall(bcl_directory)

    workflow = pypeliner.workflow.Workflow()

    if bcl_directory:
        workflow.transform (
            name = "bcl_to_fastq",
            func = CellRanger.mkfastq,
            ret = pypeliner.managed.TempOutputObj("fastq_object"),
            args = (
                bcl_object,
            )
        )

    if bcl_directory != None or fastq_directory != None:
        if fastq_directory != None:
            fastq = FastQDirectory(fastq_directory)
            # fastqs = list()
            # for sample_sheet in glob.iglob(os.path.join(fastq_directory, "**/*.csv")):
        else:
            fastq = pypeliner.managed.TempInputObj("fastq_object")
        workflow.transform (
            name = "fastq_counts",
            func = CellRanger.count,
            ret = pypeliner.managed.TempOutputObj("tenx_analysis"),
            args = (
                fastq,
            )
        )

    tenx = None
    if tenx_analysis != None and rdata == None:
        tenx = TenxAnalysis(tenx_analysis)
    elif tenx_analysis == None and rdata == None:
        tenx = pypeliner.managed.TempInputObj("tenx_analysis")
    if tenx != None:
        workflow.transform (
            name = "tenx_read10xcounts",
            func = TenX.read10xCounts,
            ret = pypeliner.managed.TempOutputObj("single_cell_experiment"),
            args = (
                tenx,
            )
        )

    if rdata != None:
        single_cell_experiment = TenxAnalysis.from_rdata(rdata)
    else:
        single_cell_experiment = pypeliner.managed.TempInputObj("single_cell_experiment")

    #
    # workflow.transform (
    #     name = "tenx_barcoderanks",
    #     func = TenX.barcodeRanks,
    #     args = (
    #         pypeliner.managed.TempInputObj("tenx_analysis"),
    #     )
    # )
    #
    # workflow.transform (
    #     name = "tenx_emptydrops",
    #     func = TenX.emptyDrops,
    #     args = (
    #         pypeliner.managed.TempInputObj("tenx_analysis"),
    #     )
    # )

    workflow.transform (
        name = "clonealign",
        func = CloneAlign.run,
        ret = pypeliner.managed.TempOutputObj("clone_align_fit"),
        args = (
            single_cell_experiment,
        )
    )

    # """
    # workflow.transform (
    #     name = "cellasign",
    #     func = CellAssign.run_em,
    #     ret = pypeliner.managed.TempOutputObj("cell_assignments"),
    #     args = (
    #         single_cell_experiment,
    #     )
    # )
    #
    # workflow.transform (
    #     name = "scviz",
    #     func = SCViz.run,
    #     ret = pypeliner.managed.TempOutputObj("scviz_dim_reduction"),
    #     args = (
    #         single_cell_experiment,
    #     )
    # )
    #
    # workflow.transform (
    #     name = "html_output",
    #     func = HTMLResults.generate,
    #     args = (
    #         pypeliner.managed.TempInputObj("fastq_object")
    #         pypeliner.managed.TempInputObj("ten_analysis"),
    #         pypeliner.managed.TempInputObj("single_cell_experiment"),
    #         pypeliner.managed.TempInputObj("clone_align_fit"),
    #         pypeliner.managed.TempInputObj("cell_assignments"),
    #         pypeliner.managed.TempInputObj("scviz_dim_reduction"),
    #     )
    # )
    """

    return workflow

Example #10

0

Show file

 def test_mapped_scvis(self):
     fastq = FastQDirectory("/Users/ceglian/share/MICHELLE_0065_AHGNCGDMXX/Project_06000_EJ/Sample_cDNA_Pre_IGO_06000_EJ_1/", "pre_65_0", "/Users/ceglian/project_data/pre_65/")
     tenx = TenxAnalysis(fastq.results)
     SCViz.train("/Users/ceglian/project_data/pre_65/sce_final.rdata",tenx,"/Users/ceglian/project_data/pre_65/clone_align_fit.rdata")

Example #11

0

Show file

 def test_fastqc(self):
     output = "./tests/fastqc_test/"
     fastq_path = "/Users/ceglian/input_data/MICHELLE_0065_AHGNCGDMXX/Project_06000_EJ/Sample_cDNA_Pre_IGO_06000_EJ_1/"
     fqobj = FastQDirectory(fastq_path)
     fastqc = FastQC()
     fastqc.run(fqobj, output)

Example #12

0

Show file

File: run_cellranger.py Project: DouglasAbrams/scrna-pipeline

def DownloadFastqs(sampleid, finished):
    fastqs = [FastQDirectory(fastq, config.prefix, config.jobpath, config.datapath) for fastq in [sampleid]]
    fastqs = glob.glob(os.path.join(config.datapath,"*.fastq.gz"))
    assert len(fastqs) > 0, "No Fastqs Download or Found."
    open(finished,"w").write("Completed")

Example #13

0

Show file

            thresholds = (0.05, 0.01, 0.001)
            import collections
            sig_genes = collections.defaultdict(list)
            for gene, pvalue in sorted_genes:
                for threshold in thresholds:
                    if pvalue < threshold:
                        sig_genes[str(threshold)].append(gene)
            print("**************** Differential Genes ********************")
            for thresh, sig_genes in sig_genes.items():
                print(thresh, len(sig_genes))
            for gene, pvalue in sorted_genes[:100]:
                print(gene, pvalue)
        return sorted_genes


if __name__ == '__main__':
    from itertools import combinations
    samples = ["Y7640", "Y7652", "Y7668", "Y8841"]
    fastqs = {}
    for sample in samples:
        output = "/igo_large/scratch/de_ciara/"
        fastq_directory = FastQDirectory("/igo_large/data/{}".format(sample),
                                         sample, output)
        fastqs[sample] = fastq_directory

    pairwise = combinations(samples, 2)
    for pair in pairwise:
        fastq_set = [fastqs[pair[0]], fastqs[pair[1]]]
        de = DifferentialExpression(pair, fastq_set)
        res = de.logistic_regression()