def RunKallisto(sample_to_path, bus_output): sample = json.loads(open(sample_to_path, "r").read()) sampleid, path = list(sample.items()).pop() fastqs = [FastQDirectory(sampleid, config.prefix, config.jobpath, path)] krunner = Kallisto(fastqs[0], sampleid) bus_path = krunner.count() open(bus_output, "w").write(bus_path)
def mkfastq(bcl_object): args = dict() args["id"] = bcl_object.id args["run"] = bcl_object.path args["csv"] = bcl_object.csv cmd = CellRanger.cmd("mkfastq", args) subprocess.call(cmd) return FastQDirectory(bcl_object.out())
def main(): sample = "TENX065" fastq = "/data/AHT52JBGXB/" output = "./" fastq_directory = FastQDirectory(fastq, sample, output) krunner = Kallisto(fastq_directory) tenx_path = krunner.count() print(tenx_path)
def set_fastq(self, fastq_directories): print(fastq_directories) self.fastq_directories = fastq_directories self.fastqs = [] for fastq_directory in self.fastq_directories: fastq = FastQDirectory(fastq_directory, self.prefix, self.output, datapath=config.datapath) if not fastq.check_status(): self.fastqs.append(fastq) if len(self.fastqs) > 0: self.workflow.transform ( name = "{}_cellranger_counts".format(self.prefix), func = CellRanger.count, args = ( self.fastqs, ) ) else: print("No FastQ to run.") return self.fastqs
def RunKallisto(sampleid, finished): fastqs = [ FastQDirectory(sampleid, config.prefix, config.jobpath, config.datapath) ] krunner = Kallisto(fastqs[0], sampleid) tenx_path = krunner.count() # tenx = TenxAnalysis(tenx_path) # tenxds = TenxDataStorage(sampleid, species=species) # tenx.bus_finalize() open(finished, "w").write("Completed")
def main(): sample = "patient2" tenx = TenxDataStorage(sample, version="v2") tenx.download() tenx_analysis = TenxAnalysis(tenx.tenx_path) tenx_analysis.load() output = "/igo_large/scratch/test_kallisto" fastq_directory = FastQDirectory( "/igo_large/scratch/allen/bams/xfastqs2/McGilvery_Sonya__TLH_MissingLibrary_1_CB8R9ANXX/", sample, output) krunner = Kallisto(fastq_directory, tenx_analysis) krunner.de()
def count(fastqs): print("Running Cellranger") fastqs = [FastQDirectory(fastq, config.prefix, config.jobpath, config.datapath) for fastq in fastqs] args = dict() fastq_files = [] args["id"] = "_".join([fastq.id for fastq in fastqs]) paths = [fastq.path for fastq in fastqs] args["fastqs"] = ",".join(paths) try: args["sample"] = ",".join([fastq.samples.sampleid[0] for fastq in fastqs]) except Exception as e: pass args["transcriptome"] = config.reference if config.chemistry is not None: args["chemistry"] = config.chemistry cmd = CellRanger.cmd("count",args) print("Running ", " ".join(cmd)) subprocess.call(cmd)
def count(fastqs, reference_override=False): print("Running Cellranger") fastqs = [ FastQDirectory(fastq, config.prefix, config.jobpath, config.datapath) for fastq in fastqs ] args = dict() fastq_files = [] args["id"] = "_".join([fastq.id for fastq in fastqs]) paths = [fastq.path for fastq in fastqs] args["fastqs"] = ",".join(paths) try: args["sample"] = ",".join( [fastq.samples.sampleid[0] for fastq in fastqs]) except Exception as e: pass args["transcriptome"] = config.reference if reference_override: args["transcriptome"] = reference_override if "mm10" in args["transcriptome"]: script = "cellranger_mouse.sh" output = open(script, "w") args["id"] += "_mouse" else: script = "cellranger_human.sh" output = open(script, "w") else: args["transcriptome"] = config.reference script = "cellranger_human.sh" output = open(script, "w") if config.chemistry is not None: args["chemistry"] = config.chemistry cmd = CellRanger.cmd("count", args) print("Saving command to submission script ", " ".join(cmd)) output.write("source /codebase/cellranger-3.0.2/sourceme.bash\n") output.write(" ".join(cmd) + "\n") output.close() result = subprocess.check_output(["bash", script]) print("Cellranger exit: {}".format(result))
def create_workflow(): """ Generates tasks as Pypeliner workflow based on input arguments. The workflow will start from the most raw input provided and override any downstream tasks with subsequently provided input arguments. Parellelization is performed over provided samplesheets and replication within samples. Args: None Yields: Pypeliner workflow object. """ bcl_directory = args.get("bcl", None) fastq_directory = args.get("fastq", None) tenx_analysis = args.get("tenx", None) rdata = args.get("rdata", None) bcl = BinaryBaseCall(bcl_directory) workflow = pypeliner.workflow.Workflow() if bcl_directory: workflow.transform ( name = "bcl_to_fastq", func = CellRanger.mkfastq, ret = pypeliner.managed.TempOutputObj("fastq_object"), args = ( bcl_object, ) ) if bcl_directory != None or fastq_directory != None: if fastq_directory != None: fastq = FastQDirectory(fastq_directory) # fastqs = list() # for sample_sheet in glob.iglob(os.path.join(fastq_directory, "**/*.csv")): else: fastq = pypeliner.managed.TempInputObj("fastq_object") workflow.transform ( name = "fastq_counts", func = CellRanger.count, ret = pypeliner.managed.TempOutputObj("tenx_analysis"), args = ( fastq, ) ) tenx = None if tenx_analysis != None and rdata == None: tenx = TenxAnalysis(tenx_analysis) elif tenx_analysis == None and rdata == None: tenx = pypeliner.managed.TempInputObj("tenx_analysis") if tenx != None: workflow.transform ( name = "tenx_read10xcounts", func = TenX.read10xCounts, ret = pypeliner.managed.TempOutputObj("single_cell_experiment"), args = ( tenx, ) ) if rdata != None: single_cell_experiment = TenxAnalysis.from_rdata(rdata) else: single_cell_experiment = pypeliner.managed.TempInputObj("single_cell_experiment") # # workflow.transform ( # name = "tenx_barcoderanks", # func = TenX.barcodeRanks, # args = ( # pypeliner.managed.TempInputObj("tenx_analysis"), # ) # ) # # workflow.transform ( # name = "tenx_emptydrops", # func = TenX.emptyDrops, # args = ( # pypeliner.managed.TempInputObj("tenx_analysis"), # ) # ) workflow.transform ( name = "clonealign", func = CloneAlign.run, ret = pypeliner.managed.TempOutputObj("clone_align_fit"), args = ( single_cell_experiment, ) ) # """ # workflow.transform ( # name = "cellasign", # func = CellAssign.run_em, # ret = pypeliner.managed.TempOutputObj("cell_assignments"), # args = ( # single_cell_experiment, # ) # ) # # workflow.transform ( # name = "scviz", # func = SCViz.run, # ret = pypeliner.managed.TempOutputObj("scviz_dim_reduction"), # args = ( # single_cell_experiment, # ) # ) # # workflow.transform ( # name = "html_output", # func = HTMLResults.generate, # args = ( # pypeliner.managed.TempInputObj("fastq_object") # pypeliner.managed.TempInputObj("ten_analysis"), # pypeliner.managed.TempInputObj("single_cell_experiment"), # pypeliner.managed.TempInputObj("clone_align_fit"), # pypeliner.managed.TempInputObj("cell_assignments"), # pypeliner.managed.TempInputObj("scviz_dim_reduction"), # ) # ) """ return workflow
def test_mapped_scvis(self): fastq = FastQDirectory("/Users/ceglian/share/MICHELLE_0065_AHGNCGDMXX/Project_06000_EJ/Sample_cDNA_Pre_IGO_06000_EJ_1/", "pre_65_0", "/Users/ceglian/project_data/pre_65/") tenx = TenxAnalysis(fastq.results) SCViz.train("/Users/ceglian/project_data/pre_65/sce_final.rdata",tenx,"/Users/ceglian/project_data/pre_65/clone_align_fit.rdata")
def test_fastqc(self): output = "./tests/fastqc_test/" fastq_path = "/Users/ceglian/input_data/MICHELLE_0065_AHGNCGDMXX/Project_06000_EJ/Sample_cDNA_Pre_IGO_06000_EJ_1/" fqobj = FastQDirectory(fastq_path) fastqc = FastQC() fastqc.run(fqobj, output)
def DownloadFastqs(sampleid, finished): fastqs = [FastQDirectory(fastq, config.prefix, config.jobpath, config.datapath) for fastq in [sampleid]] fastqs = glob.glob(os.path.join(config.datapath,"*.fastq.gz")) assert len(fastqs) > 0, "No Fastqs Download or Found." open(finished,"w").write("Completed")
thresholds = (0.05, 0.01, 0.001) import collections sig_genes = collections.defaultdict(list) for gene, pvalue in sorted_genes: for threshold in thresholds: if pvalue < threshold: sig_genes[str(threshold)].append(gene) print("**************** Differential Genes ********************") for thresh, sig_genes in sig_genes.items(): print(thresh, len(sig_genes)) for gene, pvalue in sorted_genes[:100]: print(gene, pvalue) return sorted_genes if __name__ == '__main__': from itertools import combinations samples = ["Y7640", "Y7652", "Y7668", "Y8841"] fastqs = {} for sample in samples: output = "/igo_large/scratch/de_ciara/" fastq_directory = FastQDirectory("/igo_large/data/{}".format(sample), sample, output) fastqs[sample] = fastq_directory pairwise = combinations(samples, 2) for pair in pairwise: fastq_set = [fastqs[pair[0]], fastqs[pair[1]]] de = DifferentialExpression(pair, fastq_set) res = de.logistic_regression()