def run(self): mod_name = 'mpileup' out_dir = utl.mk_outdir(mod_name) # continue to next phase glv.outlist.outfile[mod_name] = list() start = time.time() # for each region for region in glv.conf.region_bed_list: out_file1 = "{}/{}_{}.{}{}".format( out_dir, mod_name, region, 1, '.vcf.gz') glv.outlist.outfile[mod_name].append(out_file1) log.debug("{}".format(out_file1)) if utl.progress_check(mod_name) == False: log.info("progress={} so skip {}.".format( glv.conf.progress, mod_name)) continue log.info("go on {}".format(mod_name)) mpileup = '{} {} {} -O u -r {} -f {} {}' mp_cmd = mpileup.format( 'bcftools', 'mpileup', glv.conf.mpl_mpileup_param, region, glv.conf.ref_fasta, " ".join(glv.conf.bam_list)) pipe_call = '{} {} {} -O u' ca_cmd = pipe_call.format( 'bcftools', 'call', glv.conf.mpl_call_param) pipe_filter = '{} {} {} -O z --threads {} -o {}' fi_cmd = pipe_filter.format( 'bcftools', 'filter', glv.conf.mpl_filter_param, glv.conf.thread, out_file1) cmd1 = "{} | {} | {}".format(mp_cmd, ca_cmd, fi_cmd) utl.save_to_tmpfile(out_file1) utl.try_exec(cmd1) utl.tabix(out_file1) log.info("mpileup finished {}".format( utl.elapsed_time(time.time(), start)))
def main(): start = time.time() log.info('program started') # run isnp = IndelSnp() isnp.run() log.info("program finished {}".format(utl.elapsed_time(time.time(), start)))
def run(self): mod_name = 'concat' out_dir = utl.mk_outdir(mod_name) heterozygosity = glv.conf.heterozygosity dname = os.path.basename(glv.conf.out_dir) out_file1 = "{}/{}.{}.{}.{}{}".format(out_dir, mod_name, dname, 'SNP_INDEL', heterozygosity, '.vcf.gz') glv.outlist.outfile[mod_name] = list() glv.outlist.outfile[mod_name].append(out_file1) log.debug("{}".format(out_file1)) all_vcf = " ".join( glv.outlist.outfile['snpfilter'] + \ glv.outlist.outfile['indelfilter']) log.debug("{}".format(all_vcf)) utl.save_to_tmpfile(out_file1) start = time.time() if heterozygosity != 'hetero': concat_nohetero = '{} {} {} -O v {} --threads {}' cmd1 = concat_nohetero.format('bcftools', 'concat', glv.conf.concat_nh_param, all_vcf, glv.conf.thread) pipe_view = '{} {} {} -O z -o {}' cmd2 = pipe_view.format('bcftools', 'view', glv.conf.concat_nh_view_param, out_file1) cmd1 = "{} | {}".format(cmd1, cmd2) else: concat_hetero = '{} {} {} -O z {} --threads {} -o {}' cmd1 = concat_hetero.format('bcftools', 'concat', glv.conf.concat_hetero_param, all_vcf, glv.conf.thread, out_file1) utl.try_exec(cmd1) utl.tabix(out_file1) log.info("concat finished {}".format( utl.elapsed_time(time.time(), start)))
def run(self): mod_name = 'indelfilter' out_dir = utl.mk_outdir(mod_name) heterozygosity = glv.conf.heterozygosity # continue to next phase glv.outlist.outfile[mod_name] = list() start = time.time() # for each region for (input_file, region) in zip(glv.outlist.outfile['svaba'], glv.conf.region_bed_list): # for sbava debug input_base_gz = os.path.basename(input_file) input_base = re.sub(r"\.gz$", "", input_base_gz) out_file0 = "{}/{}".format(out_dir, input_base) out_file0_gz = "{}/{}".format(out_dir, input_base_gz) out_file1 = "{}/{}_{}.{}{}".format(out_dir, mod_name, region, 'annote', '.vcf.gz') out_file2 = "{}/{}_{}.{}{}".format(out_dir, mod_name, region, 'norm', '.vcf.gz') out_file3 = "{}/{}_{}.{}{}".format(out_dir, mod_name, region, 'h**o', '.vcf.gz') if heterozygosity == 'h**o': glv.outlist.outfile[mod_name].append(out_file3) else: glv.outlist.outfile[mod_name].append(out_file2) if utl.progress_check(mod_name) == False: log.info("progress={} so skip {}.".format( glv.conf.progress, mod_name)) continue log.info("go on {}".format(mod_name)) # for svaba bug with open(out_file0, mode='w') as f: with gzip.open(input_file, "rt") as fi: for liner in fi: r_line = liner.strip() if r_line.startswith('#'): f.write("{}\n".format(r_line)) else: w_line = list() for col_n, item in enumerate(r_line.split('\t')): if col_n <= 8: w_line += [item] else: if '/' in item: w_line += [item] f.write("{}\n".format('\t'.join(w_line))) utl.save_to_tmpfile(out_file0_gz) cmd1 = "bgzip -@ {} {}".format(glv.conf.thread, out_file0) utl.try_exec(cmd1) utl.tabix(out_file0_gz) input_valid_vcf = "{}/{}".format(out_dir, input_base_gz) view1 = '{} {} {} -O v -r {} {}' v1_cmd = view1.format('bcftools', 'view', glv.conf.indf_view1_param, region, input_valid_vcf) pipe_annotate = '{} {} {} -O z --threads {} -o {}' # use threads only -O z|b an_cmd = pipe_annotate.format('bcftools', 'annotate', glv.conf.indf_annotate_param, glv.conf.thread, out_file1) cmd1 = '{} | {}'.format(v1_cmd, an_cmd) utl.try_exec(cmd1) utl.tabix(out_file1) norm = '{} {} {} -O z --threads {} -f {} -o {} {}' # use threads only -O z|b cmd2 = norm.format('bcftools', 'norm', glv.conf.indf_norm_param, glv.conf.thread, glv.conf.ref_fasta, out_file2, out_file1) utl.try_exec(cmd2) utl.tabix(out_file2) tabix1 = "{}.tbi".format(out_file1) os.remove(out_file1) os.remove(tabix1) log.info("remove {} {}".format(out_file1, tabix1)) #------------------------- if heterozygosity != 'h**o': continue view2 = '{} {} {} -O z --threads {} -r {} -o {} {}' cmd3 = view2.format('bcftools', 'view', glv.conf.indf_view2_param, glv.conf.thread, region, out_file3, out_file2) #utl.save_to_tmpfile(out_file3) utl.try_exec(cmd3) utl.tabix(out_file3) tabix2 = "{}.tbi".format(out_file2) os.remove(out_file2) os.remove(tabix2) log.info("remove {} {}".format(out_file2, tabix2)) log.info("indelfilter finished {}".format( utl.elapsed_time(time.time(), start)))
def run(self): mod_name = 'svaba' out_dir = utl.mk_outdir(mod_name) # target_vcfs = [ # 'svaba.indel.vcf', 'svaba.sv.vcf', # 'svaba.unfiltered.indel.vcf', 'svaba.unfiltered.sv.vcf'] # continue to next phase glv.outlist.outfile[mod_name] = list() start = time.time() os.chdir(out_dir) for region in glv.conf.region_bed_list: title = "{}/indel_{}".format(out_dir, region) if len(glv.conf.svaba_normalize_bams_list) != 0: norm = " -n ".join(glv.conf.svaba_normalize_bams_list) norm = "-n " + norm glv.outlist.outfile[mod_name].append( "{}.{}{}".format(title, 'svaba.indel.vcf', '.gz')) else: glv.outlist.outfile[mod_name].append( "{}.{}{}".format(title, 'svaba.indel.vcf', '.gz')) # normalize option norm = '' log.debug("{}".format(norm)) if utl.progress_check(mod_name) == False: log.info("progress={} so skip {}.".format( glv.conf.progress, mod_name)) continue log.info("go on {}".format(mod_name)) svaba = '{} {} -t {} -G {} -k {} {} -p {} {} -a {}' cmd1 = svaba.format( 'svaba', 'run', " -t ".join(glv.conf.bam_list), glv.conf.ref_fasta, region, norm, glv.conf.thread, glv.conf.svb_svaba_param, title) utl.try_exec(cmd1) # *.vcf #target_vcfs = list() #for fpath in glob.glob("{}*.vcf".format(out_dir)): # target_vcfs.append(fpath) target_vcfs = utl.check_for_files("{}/*.vcf".format(out_dir)) log.debug("{}".format(target_vcfs)) for t_vcf in target_vcfs: cmd2 = "bgzip -@ {} {}".format( glv.conf.thread, t_vcf) utl.try_exec(cmd2) utl.tabix("{}{}".format(t_vcf, '.gz')) os.chdir(glv.conf.cwd) log.info("svaba finished {}".format( utl.elapsed_time(time.time(), start)))
def run(self): mod_name = 'snpfilter' out_dir = utl.mk_outdir(mod_name) heterozygosity = glv.conf.heterozygosity # continue to next phase glv.outlist.outfile[mod_name] = list() start = time.time() # for each region for (input_file, region) in zip(glv.outlist.outfile['mpileup'], glv.conf.region_bed_list): out_file1 = "{}/{}_{}.{}{}".format(out_dir, mod_name, region, 'annote', '.vcf.gz') out_file2 = "{}/{}_{}.{}{}".format(out_dir, mod_name, region, 'h**o', '.vcf.gz') if heterozygosity == 'h**o': glv.outlist.outfile[mod_name].append(out_file2) else: glv.outlist.outfile[mod_name].append(out_file1) if utl.progress_check(mod_name) == False: log.info("progress={} so skip {}.".format( glv.conf.progress, mod_name)) continue log.info("go on {}".format(mod_name)) view1 = '{} {} {} -O v -r {} {}' v1_cmd = view1.format('bcftools', 'view', glv.conf.snpf_view1_param, region, input_file) pipe_annotate = '{} {} {} -O z --threads {} -o {}' an_cmd = pipe_annotate.format('bcftools', 'annotate', glv.conf.snpf_annotate_param, glv.conf.thread, out_file1) cmd1 = '{} | {}'.format(v1_cmd, an_cmd) utl.try_exec(cmd1) utl.tabix(out_file1) #------------------------- if heterozygosity != 'h**o': continue view2 = '{} {} {} -O z --threads {} -r {} -o {} {}' cmd2 = view2.format('bcftools', 'view', glv.conf.snpf_view2_param, glv.conf.thread, region, out_file2, out_file1) utl.save_to_tmpfile(out_file2) utl.try_exec(cmd2) utl.tabix(out_file2) tabix1 = "{}.tbi".format(out_file1) os.remove(out_file1) os.remove(tabix1) log.info("remove {} {}".format(out_file1, tabix1)) log.info("snpfilter finished {}".format( utl.elapsed_time(time.time(), start)))