def run_ballgown(self): build([ballgown.ballgown( workdir=self.workdir, kingdom="eukarya", exp_design=self.exp_desn_file, p_value=self.p_value)], local_scheduler=self.local_scheduler, workers=1)
def merge_stringtie(self, new_gff): build([stringtie.MergeStringTies(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, workdir=self.workdir, gff_file=new_gff, kingdom=self.kingdom)], local_scheduler=self.local_scheduler, workers=1)
def run_deseq2(self): build([ DGE.DESeq2(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, indexfile=self.hisat_index, bindir=self.bindir, workdir=self.workdir, ref_file=self.ref_fastas.split(",")[0], kingdom='prokarya', gff=os.path.join(self.workdir, "prok_updated.gff"), exp_design=self.exp_desn_file, p_value=self.p_value), DGE.DESeq2(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, indexfile=self.hisat_index, bindir=self.bindir, workdir=self.workdir, ref_file=self.ref_fastas.split(",")[1], kingdom='eukarya', gff=os.path.join(self.workdir, "euk_updated.gff"), exp_design=self.exp_desn_file, p_value=self.p_value) ], local_scheduler=self.local_scheduler, workers=self.no_of_jobs)
def test_restringtie(): """Test restringtie approach.""" build([ stringtie.ReStringTieScoresW(fastq_dic={ 'samp1': "l", 'samp2': ['b'], 'samp3': ['b'], 'samp4': ['b'], 'samp5': ['b'], 'samp6': [ 'tests/data/fastqs/BTT_test27_R1.fastq.gz', "tests/data/fastqs/BTT_test27_R2.fastq.gz" ] }, num_cpus=2, workdir=os.path.join( "tests", "data", "test_prok"), kingdom="prokarya") ], local_scheduler=True) assert os.stat( os.path.join("tests", "data", "test_prok", "processes", "ballgown", "prokarya", "samp6", "e_data.ctab")).st_size > 1
def NovelRegions(self): """Find novel regions.""" build([srna.FindNovelRegionsW(fastq_dic=self.fastq_dic, workdir=self.workdir, kingdom=self.kingdom, gff_file=self.prok_gff + "," + self.euk_gff)], local_scheduler=self.local_scheduler)
def run_opaver(self, method): build([ RunOpaver( workdir=self.workdir, kingdom=self.kingdom, method=method) ], local_scheduler=self.local_scheduler, workers=1)
def test_featurecount(): """Test star index creation and mapping.""" map_dir = os.path.join("tests/test_count", "processes", "mapping", "samp5") if os.path.exists(map_dir) is False: os.makedirs(map_dir) cp_cmd = [ "tests/data/test_prok/processes/mapping/samp5/samp5_srt.bam", map_dir ] cp[cp_cmd]() build([ fc.FeatureCounts(fastq_dic={'samp5': ''}, kingdom="prokarya", gff_file="tests/data/test_prok.gff", workdir="tests/test_count", indexfile="", num_cpus=2, ref_file="tests/data/test_prok.fna", fid="ID", stranded=0) ], local_scheduler=True) assert os.path.exists( "tests/test_count/processes/featureCounts/prokarya/gene_count.tsv" ) is True rm_cmd = rm["-rf", "tests/test_count"] rm_cmd()
def run_emapper(self, new_gff, kingdom, fasta): build([function.RunEmapper(workdir=self.workdir, gff_file=new_gff, fasta_file=fasta, kingdom=kingdom, emapper_dir=self.emap_dir)], local_scheduler=self.local_scheduler, workers=1)
def test_CreateReadSketches(): """ Test if this first luigi class works test for creating sketches """ build([ faqcs.RunAllQC(in_folder="tests/data/fqs", out_folder="tests/sk_test", num_cpus=1, faqc_min_L=50, n_cutoff=4), sketch.AllSketches(in_folder="tests/data/fqs", kmer=31, threads=2, sketch=100, seed=2500, min_copy=2, out_folder="tests/sk_test", mash_tool="mash") ], local_scheduler=True, workers=1) file_exist = os.path.exists("tests/sk_test/.mash/SRR059451_.msh") shutil.rmtree("tests/sk_test") assert file_exist is True
def create_db(self): """Function to create hisat index.""" if self.aligner == "hisat2": ref_fastas = self.prok_fasta + "," + self.euk_fasta build([hisat2.HisatIndex(fasta=ref_fastas, hi_index=self.hisat_index, num_cpus=self.num_cpus), Map.SAMindex(fasta=ref_fastas, workdir=self.workdir)], # Map.CreateSplice(gff_file=self.ref_gffs.split(",")[1], # workdir=self.workdir), # Map.GetChromName(prok_ref=self.ref_fastas.split(",")[0], # euk_ref=self.ref_fastas.split(",")[1], # workdir=self.workdir)], local_scheduler=self.local_scheduler, workers=1) elif self.aligner in ["STAR", "star"]: ref_fastas = self.prok_fasta + "," + self.euk_fasta build([star.STARindex(fasta=ref_fastas, num_cpus=self.num_cpus, gff_file=self.euk_gff, stardb_dir = self.stardb_dir, kingdom=self.kingdom), Map.SAMindex(fasta=ref_fastas, workdir=self.workdir) # Map.CreateSplice(gff_file=self.euk_gff, # workdir=self.workdir), # Map.GetChromName(prok_ref=self.prok_fasta, # euk_ref=self.euk_fasta, # workdir=self.workdir) ], local_scheduler=self.local_scheduler)
def restringtie(self): build([ stringtie.ReStringTieScoresW(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, workdir=self.workdir, kingdom=self.kingdom) ], local_scheduler=self.local_scheduler, workers=1)
def create_new_gff(self): build([ srna.CompileGFF(fastq_dic=self.fastq_dic, kingdom=self.kingdom, workdir=self.workdir, gff_file=self.gff_file) ], local_scheduler=self.local_scheduler, workers=1)
def summ_json(self, new_gff, method, NovelRegions, kingdom, fasta): build([summarize.conver2json(gff_file=new_gff, fasta_file=fasta, pathway=self.pathway, workdir=self.workdir, kingdom=kingdom, method=method, NovelRegions=NovelRegions)], local_scheduler=self.local_scheduler, workers=1)
def feature_counts(self, new_gff, kingdom): build([featurecounts.FeatureCounts(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, gff_file=new_gff, indexfile=self.hisat_index, kingdom=kingdom, workdir=self.workdir, ref_file=self.prok_fasta + "," + self.euk_fasta)], local_scheduler=self.local_scheduler, workers=1)
def run_emapper(self, new_gff): build([ RunEmapper(workdir=self.workdir, gff_file=new_gff, fasta_file=self.ref_fasta, kingdom=self.kingdom, emapper_dir=self.emap_dir) ], local_scheduler=self.local_scheduler, workers=1)
def extract_pp(self): """Extract properly paired reads.""" build([ srna.ExtractPPW(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, indexfile=self.hisat_index, workdir=self.workdir, kingdom=self.kingdom) ], local_scheduler=self.local_scheduler)
def split_prokeuk(self): build([ Map.Split2ProkEukW(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, indexfile=self.hisat_index, workdir=self.workdir, ref_file=self.ref_fastas) ], local_scheduler=self.local_scheduler, workers=self.no_of_jobs)
def map_hisat(self): """Function that maps reads to hisat2 index.""" build([ Map.HisatMapW(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, indexfile=self.hisat_index, workdir=self.workdir, ref_file=self.ref_fastas) ], local_scheduler=self.local_scheduler, workers=self.no_of_jobs)
def map_reads(self): """Function to map reads.""" if self.aligner == "hisat2": build([hisat2.HisatMapW(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, indexfile=self.hisat_index, workdir=self.workdir, kingdom=self.kingdom)], local_scheduler=self.local_scheduler) elif self.aligner in ["STAR", "star"]: build([star.map_starW(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, stardb_dir=self.stardb_dir, workdir=self.workdir)], local_scheduler=self.local_scheduler)
def summarize_map(self): build([ srna.SummarizeMap(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, indexfile=self.hisat_index, workdir=self.workdir, ref_file=self.ref_fastas, kingdom=self.kingdom) ], local_scheduler=self.local_scheduler, workers=self.no_of_jobs)
def find_NovelRegions(self): build([ srna.FindNovelRegionsW(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, indexfile=self.hisat_index, kingdom=self.kingdom, workdir=self.workdir, ref_file=self.ref_fasta, gff_file=self.gff_file) ], local_scheduler=self.local_scheduler, workers=1)
def restringtie(self): build([ Summ.ReStringTieScoresW(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, indexfile=self.hisat_index, workdir=self.workdir, ref_file=self.ref_fastas, gff_file=self.ref_gffs, kingdom=self.kingdom) ], local_scheduler=self.local_scheduler, workers=self.no_of_jobs)
def map_hisat_summarize(self): no_of_jobs = 1 build([ srna.SummarizeHisatMap(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, indexfile=self.hisat_index, workdir=self.workdir, ref_file=self.ref_fasta, kingdom=self.kingdom) ], local_scheduler=self.local_scheduler, workers=no_of_jobs)
def merge_stringties(self): build([ Summ.MergeStringTies(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, indexfile=self.hisat_index, workdir=self.workdir, ref_file=self.ref_fastas, gff_file=self.ref_gffs, kingdom="both") ], local_scheduler=self.local_scheduler, workers=self.no_of_jobs)
def feature_count_updated(self, new_gff): build([ featurecounts.FeatureCounts(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, gff_file=new_gff, indexfile=self.hisat_index, kingdom=self.kingdom, workdir=self.workdir, ref_file=self.ref_fasta) ], local_scheduler=self.local_scheduler, workers=1)
def compile_gff(self): build([ srna.CompileGFF(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, indexfile=self.hisat_index, kingdom=self.kingdom, workdir=self.workdir, ref_file=self.ref_fastas, gff_file=self.ref_gffs) ], local_scheduler=self.local_scheduler, workers=self.no_of_jobs)
def novel_regions(self): build([ srna.FindNovelRegionsW(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, indexfile=self.hisat_index, workdir=self.workdir, ref_file=self.ref_fastas, gff_file=self.ref_gffs, kingdom=self.kingdom) ], local_scheduler=self.local_scheduler, workers=self.no_of_jobs)
def run_stringtie(self): build([ Map.StringTieScoresW(fastq_dic=self.fastq_dic, num_cpus=self.num_cpus, indexfile=self.hisat_index, workdir=self.workdir, ref_file=self.ref_fasta, gff_file=os.path.join(self.workdir, "updated.gff"), kingdom=self.kingdom) ], local_scheduler=self.local_scheduler, workers=1)
def test_opaver(): """Test deseq3.""" build([ opaver.RunOpaver(workdir=os.path.join("tests", "data", "test_prok"), kingdom="prokarya", method="edgeR") ], local_scheduler=True) assert os.stat( os.path.join("tests", "data", "test_prok", "processes", "DESeq2", "prokarya", "gene", "liver__over__spleen__gene__sig.csv")).st_size > 1
def run_deseq2(self): build( [ DESeq2.DESeq2( workdir=self.workdir, kingdom=self.kingdom, # gff_file=self.gff_file, # pathway=pathway, # GAGE=GAGE, exp_design=self.exp_desn_file, p_value=self.p_value) ], local_scheduler=self.local_scheduler, workers=1)