def run_test( basic: Basic, code: str, name: str, res_type: type_res = None, err_type: type_err = None, value: type_value = None, repl: bool = True, ) -> None: if not isinstance(res_type, list): res_type = [res_type] if res_type else [] if not isinstance(value, list): value = [value] if value is not None else [] try: results = basic.run(code, name, repl) assert len(results) == len(res_type) for i, res in enumerate(results): assert res_type and isinstance(res, res_type[i]) if value and value[i] is not Ellipsis: if res.value != value[i]: print(res.value, value[i]) assert res.value == value[i] except Error as err: assert err_type and isinstance(err, err_type) else: assert err_type is None
def main(debug: bool = False) -> None: basic = Basic() while True: text = input("basic > ") try: results = basic.run(text, "<stdin>", True, debug) for result in results: print(result) except Error as err: print_error(err)
def main(file_name: str) -> None: with open(file_name, "r") as f: code = f.read() basic = Basic() try: results = basic.run(code, file_name) for result in results: print(result) except Error as err: print_error(err)
def submit(self, number=None, mem="15G"): """ number: how many jobs to submit: "all", "last_one" """ handle = open(self.sample_list_file, "r") samples = handle.readlines() if number == "last_one": last_one = samples[-1].split("\t")[0] last_one_pbs = os.path.join(self.pbs_dir, last_one + ".pbs") cmd = "qsub -l nodes=1:ppn=%s -l mem=%s %s" % (self.threads, mem, last_one_pbs) p = Basic.run(cmd, wkdir=self.pbs_dir) print(p.stdout.read()) if number == "all": for one_sample in samples: one = one_sample[-1].split("\t")[0] one_pbs = os.path.join(self.pbs_dir, one + ".pbs") cmd = "qsub -l nodes=1:ppn=%s -l mem=%s %s" % (self.threads, mem, one_pbs) p = Basic.run(cmd, wkdir=self.pbs_dir) print(p.stdout.read()) print("finish submit pbs!")
def process(self, sampleID=None, fastq_1=None, fastq_2=None): oneSample_dir = os.path.join(self.bam_dir, sampleID) if not os.path.exists(oneSample_dir): os.makedirs(oneSample_dir) log_file = os.path.join(oneSample_dir, sampleID + ".log.txt") #step1. bwa mapping to whole genome sort_bam = os.path.join(oneSample_dir, sampleID + ".sort.bam") cmd = """%s mem -R '@RG\\tID:GRCH37\\tSM:%s\\tLB:\\tPL:ILLUMINA' -t %s %s %s %s|%s view -@ %s -Shu -|%s sort -@ %s -o %s - > %s;\n""" % \ (self.bwa, sampleID, self.threads, self.whole_genome, fastq_1, fastq_2, self.samtools, self.threads,self.samtools, self.threads,\ sort_bam, log_file) Basic.run(cmd=cmd, wkdir=self.prefix) ##step1.1 bwa index for bam file cmd = """%s index %s;\n""" % (self.samtools, sort_bam) Basic.run(cmd=cmd, wkdir=self.prefix) #step2. filter out read mapped in bam file MT_bam = os.path.join(oneSample_dir, sampleID + ".MT.bam") cmd = "%s view -bh %s chrM -o %s;\n" % (self.samtools, sort_bam, MT_bam) Basic.run(cmd=cmd, wkdir=self.prefix) #step3. obtain unique mapped reads MT_bam_unique = os.path.join(oneSample_dir, sampleID + ".unique.MT.bam") MT_bam_noneUnique = os.path.join(oneSample_dir, sampleID + ".noneUnique.MT.bam") cmd = "%s view -h %s | grep -v -e 'XA:Z:' -e 'SA:Z:' | samtools view -b > %s;" % \ (self.samtools, MT_bam, MT_bam_unique) Basic.run(cmd=cmd, wkdir=self.prefix) cmd = "%s view -h %s | grep -e 'XA:Z:' -e 'SA:Z:' | samtools view -b > %s;" % \ (self.samtools, MT_bam, MT_bam_noneUnique) Basic.run(cmd=cmd, wkdir=self.prefix) #step4. mpileup p_sort_bam = os.path.join(oneSample_dir, sampleID + ".p.sort.bam") cmd = "%s sort -@ 7 -o %s %s;\n" % (self.samtools, p_sort_bam, MT_bam_unique) Basic.run(cmd=cmd, wkdir=self.prefix) mpile_file = os.path.join(self.mpile_dir, sampleID + ".mpile.file") cmd = "%s mpileup -B -Q 30 -d 1000000 -L 10000 -f %s %s > %s;" % ( self.samtools, self.ref_genome, p_sort_bam, mpile_file) Basic.run(cmd=cmd, wkdir=self.prefix) #step5. call heteroplasmy # heteroplasmy heteroplasmy_raw = os.path.join(oneSample_dir, sampleID + ".mp.raw") cmd = '%s -i %s -o %s;\n' % (self.het_raw, mpile_file, heteroplasmy_raw) Basic.run(cmd=cmd, wkdir=self.prefix) heteropasmy = os.path.join(self.mpile_dir, sampleID + ".heteroplasmy.txt") cmd = 'python2 %s --loose %s --chi %s -d %s --mle %s -i %s -o %s' \ %(self.het_filter, "0.003,0.003", "0.0", "1000", "0", heteroplasmy_raw, heteropasmy) Basic.run(cmd=cmd, wkdir=self.prefix) print("finish detetcting!")
def new_process(self, sampleID=None, fastq_1=None, fastq_2=None): oneSample_dir = os.path.join(self.bam_dir, sampleID) if not os.path.exists(oneSample_dir): os.makedirs(oneSample_dir) log_file = os.path.join(oneSample_dir, sampleID + ".log.txt") #step1. bwa mapping to whole genome sort_bam = os.path.join(oneSample_dir, sampleID + ".sort.bam") cmd = """%s mem -R '@RG\\tID:GRCH37\\tSM:%s\\tLB:mitochondira\\tPL:ILLUMINA' -t %s %s %s %s|%s view -@ %s -Shu -|%s sort -@ %s -o %s - > %s;\n""" % \ (self.bwa, sampleID, self.threads, self.whole_genome, fastq_1, fastq_2, self.samtools, self.threads,self.samtools, self.threads,\ sort_bam, log_file) Basic.run(cmd=cmd, wkdir=self.prefix) cmd = """%s index %s;\n""" % (self.samtools, sort_bam) Basic.run(cmd=cmd, wkdir=self.prefix) #step2. filter out read mapped in bam file bamio = bamIO( bam_file= "/home/zyang/Project/mitochondria/pnas_data/luo_pipeline/work_test/bam/ERR452358/ERR452358.sort.bam" ) MTbam = bamio.run() cmd = """%s index %s;\n""" % (self.samtools, MTbam) Basic.run(cmd=cmd, wkdir=self.prefix) #step3. mark duplicate marked_duplicated_bam = os.path.join(oneSample_dir, sampleID + ".mkdup.bam") mkmatrix = os.path.join(oneSample_dir, sampleID + ".mkdup.matrix.txt") cmd = self.markduplicate(input_bam=MTbam, marked_duplicated_bam=marked_duplicated_bam, marked_dup_metrics_txt=mkmatrix) Basic.run(cmd=cmd, wkdir=self.prefix) bamio.sample_meta_infor["befor_mkduped_read"] = bamio.mapped_read( input_bam=MTbam) cmd = """%s index %s;\n""" % (self.samtools, marked_duplicated_bam) Basic.run(cmd=cmd, wkdir=self.prefix) bamio.sample_meta_infor["after_mkduped_read"] = bamio.mapped_read( input_bam=marked_duplicated_bam) #step4. local realignment localRealign_bam = os.path.join(oneSample_dir, sampleID + ".localRealign.bam") cmd = self.localRealignment(input_bam=marked_duplicated_bam, output_bam=localRealign_bam) Basic.run(cmd=cmd, wkdir=self.prefix) cmd = """%s index %s;\n""" % (self.samtools, localRealign_bam) Basic.run(cmd=cmd, wkdir=self.prefix) bamio.sample_meta_infor["after_local_realign"] = bamio.mapped_read( input_bam=localRealign_bam) #step5. mismatch filter mismatch_bam = os.path.join(oneSample_dir, sampleID + ".mismatchFilter.bam") bamio.limit_mismatch(input_bam=localRealign_bam, output_bam=mismatch_bam, mismatch_num=1) cmd = """%s index %s;\n""" % (self.samtools, localRealign_bam) Basic.run(cmd=cmd, wkdir=self.prefix) #step5. mpileup p_sort_bam = os.path.join(oneSample_dir, sampleID + ".p.sort.bam") MT_bam_unique = os.path.join(oneSample_dir, sampleID + ".unique.MT.bam") cmd = "%s view -h %s | grep -v -e 'XA:Z:' -e 'SA:Z:' | samtools view -b > %s;" % \ (self.samtools, mismatch_bam, MT_bam_unique) Basic.run(cmd=cmd, wkdir=self.prefix) cmd = "%s sort -@ 7 -o %s %s;\n" % (self.samtools, p_sort_bam, MT_bam_unique) Basic.run(cmd=cmd, wkdir=self.prefix) mpile_file = os.path.join(self.mpile_dir, sampleID + ".mpile.file") cmd = "%s mpileup -B -Q 20 -q 20 -d 1000000 -L 10000 -f %s %s > %s;" % ( self.samtools, self.ref_genome, MT_bam_unique, mpile_file) Basic.run(cmd=cmd, wkdir=self.prefix) #step5. call heteroplasmy # heteroplasmy heteroplasmy_raw = os.path.join(oneSample_dir, sampleID + ".mp.raw") cmd = '%s -i %s -o %s;\n' % (self.het_raw, mpile_file, heteroplasmy_raw) Basic.run(cmd=cmd, wkdir=self.prefix) heteropasmy = os.path.join(self.mpile_dir, sampleID + ".heteroplasmy.txt") cmd = 'python2 %s --loose %s --chi %s -d %s --mle %s -i %s -o %s' \ %(self.het_filter, "0.003,0.003", "0.0", "1000", "0", heteroplasmy_raw, heteropasmy) Basic.run(cmd=cmd, wkdir=self.prefix) print("finish heteroplasmy detecting!") print(bamio.sample_meta_infor)