Example #1
0
    def run(self):

        mod_name = 'mpileup'
        out_dir = utl.mk_outdir(mod_name)

        # continue to next phase
        glv.outlist.outfile[mod_name] = list()

        start = time.time()

        # for each region
        for region in glv.conf.region_bed_list:

            out_file1 = "{}/{}_{}.{}{}".format(
                out_dir, mod_name, region, 1, '.vcf.gz')

            glv.outlist.outfile[mod_name].append(out_file1)
            log.debug("{}".format(out_file1))

            if utl.progress_check(mod_name) == False:
                log.info("progress={} so skip {}.".format(
                    glv.conf.progress,
                    mod_name))
                continue

            log.info("go on {}".format(mod_name))

            mpileup = '{} {} {} -O u -r {} -f {} {}'
            mp_cmd = mpileup.format(
                'bcftools',
                'mpileup',
                glv.conf.mpl_mpileup_param,
                region,
                glv.conf.ref_fasta,
                " ".join(glv.conf.bam_list))

            pipe_call = '{} {} {} -O u'
            ca_cmd = pipe_call.format(
                'bcftools',
                'call',
                glv.conf.mpl_call_param)

            pipe_filter = '{} {} {} -O z --threads {} -o {}'
            fi_cmd = pipe_filter.format(
                'bcftools',
                'filter',
                glv.conf.mpl_filter_param,
                glv.conf.thread,
                out_file1)

            cmd1 = "{} | {} | {}".format(mp_cmd, ca_cmd, fi_cmd)

            utl.save_to_tmpfile(out_file1)
            utl.try_exec(cmd1)
            utl.tabix(out_file1)

        log.info("mpileup finished {}".format(
            utl.elapsed_time(time.time(), start)))
Example #2
0
def main():

    start = time.time()
    log.info('program started')

    # run
    isnp = IndelSnp()
    isnp.run()

    log.info("program finished {}".format(utl.elapsed_time(time.time(),
                                                           start)))
Example #3
0
    def run(self):

        mod_name = 'concat'
        out_dir = utl.mk_outdir(mod_name)
        heterozygosity = glv.conf.heterozygosity

        dname = os.path.basename(glv.conf.out_dir)

        out_file1 = "{}/{}.{}.{}.{}{}".format(out_dir, mod_name, dname,
                                              'SNP_INDEL', heterozygosity,
                                              '.vcf.gz')

        glv.outlist.outfile[mod_name] = list()
        glv.outlist.outfile[mod_name].append(out_file1)
        log.debug("{}".format(out_file1))

        all_vcf = " ".join(
            glv.outlist.outfile['snpfilter'] + \
            glv.outlist.outfile['indelfilter'])
        log.debug("{}".format(all_vcf))

        utl.save_to_tmpfile(out_file1)

        start = time.time()

        if heterozygosity != 'hetero':

            concat_nohetero = '{} {} {} -O v {} --threads {}'
            cmd1 = concat_nohetero.format('bcftools', 'concat',
                                          glv.conf.concat_nh_param, all_vcf,
                                          glv.conf.thread)

            pipe_view = '{} {} {} -O z -o {}'
            cmd2 = pipe_view.format('bcftools', 'view',
                                    glv.conf.concat_nh_view_param, out_file1)

            cmd1 = "{} | {}".format(cmd1, cmd2)

        else:
            concat_hetero = '{} {} {} -O z {} --threads {} -o {}'
            cmd1 = concat_hetero.format('bcftools', 'concat',
                                        glv.conf.concat_hetero_param, all_vcf,
                                        glv.conf.thread, out_file1)

        utl.try_exec(cmd1)
        utl.tabix(out_file1)

        log.info("concat finished {}".format(
            utl.elapsed_time(time.time(), start)))
Example #4
0
    def run(self):

        mod_name = 'indelfilter'
        out_dir = utl.mk_outdir(mod_name)
        heterozygosity = glv.conf.heterozygosity

        # continue to next phase
        glv.outlist.outfile[mod_name] = list()

        start = time.time()

        # for each region
        for (input_file, region) in zip(glv.outlist.outfile['svaba'],
                                        glv.conf.region_bed_list):

            # for sbava debug
            input_base_gz = os.path.basename(input_file)
            input_base = re.sub(r"\.gz$", "", input_base_gz)
            out_file0 = "{}/{}".format(out_dir, input_base)
            out_file0_gz = "{}/{}".format(out_dir, input_base_gz)

            out_file1 = "{}/{}_{}.{}{}".format(out_dir, mod_name, region,
                                               'annote', '.vcf.gz')
            out_file2 = "{}/{}_{}.{}{}".format(out_dir, mod_name, region,
                                               'norm', '.vcf.gz')
            out_file3 = "{}/{}_{}.{}{}".format(out_dir, mod_name, region,
                                               'h**o', '.vcf.gz')

            if heterozygosity == 'h**o':
                glv.outlist.outfile[mod_name].append(out_file3)
            else:
                glv.outlist.outfile[mod_name].append(out_file2)

            if utl.progress_check(mod_name) == False:
                log.info("progress={} so skip {}.".format(
                    glv.conf.progress, mod_name))
                continue

            log.info("go on {}".format(mod_name))

            # for svaba bug
            with open(out_file0, mode='w') as f:
                with gzip.open(input_file, "rt") as fi:
                    for liner in fi:
                        r_line = liner.strip()
                        if r_line.startswith('#'):
                            f.write("{}\n".format(r_line))
                        else:
                            w_line = list()
                            for col_n, item in enumerate(r_line.split('\t')):
                                if col_n <= 8:
                                    w_line += [item]
                                else:
                                    if '/' in item:
                                        w_line += [item]
                            f.write("{}\n".format('\t'.join(w_line)))

            utl.save_to_tmpfile(out_file0_gz)

            cmd1 = "bgzip -@ {} {}".format(glv.conf.thread, out_file0)

            utl.try_exec(cmd1)
            utl.tabix(out_file0_gz)

            input_valid_vcf = "{}/{}".format(out_dir, input_base_gz)

            view1 = '{} {} {} -O v -r {} {}'
            v1_cmd = view1.format('bcftools', 'view',
                                  glv.conf.indf_view1_param, region,
                                  input_valid_vcf)

            pipe_annotate = '{} {} {} -O z --threads {} -o {}'
            # use threads only -O z|b
            an_cmd = pipe_annotate.format('bcftools', 'annotate',
                                          glv.conf.indf_annotate_param,
                                          glv.conf.thread, out_file1)

            cmd1 = '{} | {}'.format(v1_cmd, an_cmd)
            utl.try_exec(cmd1)
            utl.tabix(out_file1)

            norm = '{} {} {} -O z --threads {} -f {} -o {} {}'
            # use threads only -O z|b
            cmd2 = norm.format('bcftools', 'norm', glv.conf.indf_norm_param,
                               glv.conf.thread, glv.conf.ref_fasta, out_file2,
                               out_file1)

            utl.try_exec(cmd2)
            utl.tabix(out_file2)

            tabix1 = "{}.tbi".format(out_file1)
            os.remove(out_file1)
            os.remove(tabix1)
            log.info("remove {} {}".format(out_file1, tabix1))

            #-------------------------
            if heterozygosity != 'h**o':
                continue

            view2 = '{} {} {} -O z --threads {} -r {} -o {} {}'
            cmd3 = view2.format('bcftools', 'view', glv.conf.indf_view2_param,
                                glv.conf.thread, region, out_file3, out_file2)

            #utl.save_to_tmpfile(out_file3)
            utl.try_exec(cmd3)
            utl.tabix(out_file3)

            tabix2 = "{}.tbi".format(out_file2)
            os.remove(out_file2)
            os.remove(tabix2)

            log.info("remove {} {}".format(out_file2, tabix2))

        log.info("indelfilter finished {}".format(
            utl.elapsed_time(time.time(), start)))
Example #5
0
    def run(self):

        mod_name = 'svaba'
        out_dir = utl.mk_outdir(mod_name)

        #        target_vcfs = [
        #    'svaba.indel.vcf', 'svaba.sv.vcf',
        #    'svaba.unfiltered.indel.vcf', 'svaba.unfiltered.sv.vcf']

        # continue to next phase
        glv.outlist.outfile[mod_name] = list()

        start = time.time()
        os.chdir(out_dir)

        for region in glv.conf.region_bed_list:

            title = "{}/indel_{}".format(out_dir, region)

            if len(glv.conf.svaba_normalize_bams_list) != 0:
                norm = " -n ".join(glv.conf.svaba_normalize_bams_list)
                norm = "-n " + norm
                glv.outlist.outfile[mod_name].append(
                    "{}.{}{}".format(title, 'svaba.indel.vcf', '.gz'))
            else:
                glv.outlist.outfile[mod_name].append(
                    "{}.{}{}".format(title, 'svaba.indel.vcf', '.gz'))

            # normalize option
            norm = ''
            log.debug("{}".format(norm))

            if utl.progress_check(mod_name) == False:
                log.info("progress={} so skip {}.".format(
                    glv.conf.progress,
                    mod_name))
                continue

            log.info("go on {}".format(mod_name))


            svaba = '{} {} -t {} -G {} -k {} {} -p {} {} -a {}'
            cmd1 = svaba.format(
                'svaba',
                'run',
                " -t ".join(glv.conf.bam_list),
                glv.conf.ref_fasta,
                region,
                norm,
                glv.conf.thread,
                glv.conf.svb_svaba_param,
                title)

            utl.try_exec(cmd1)

            # *.vcf
            #target_vcfs = list()
            #for fpath in glob.glob("{}*.vcf".format(out_dir)):
            #    target_vcfs.append(fpath)   
            target_vcfs = utl.check_for_files("{}/*.vcf".format(out_dir))
            log.debug("{}".format(target_vcfs))

            for t_vcf in target_vcfs:

                cmd2 = "bgzip -@ {} {}".format(
                    glv.conf.thread,
                    t_vcf)

                utl.try_exec(cmd2)
                utl.tabix("{}{}".format(t_vcf, '.gz'))

        
        os.chdir(glv.conf.cwd)

        log.info("svaba finished {}".format(
            utl.elapsed_time(time.time(), start)))
Example #6
0
    def run(self):

        mod_name = 'snpfilter'
        out_dir = utl.mk_outdir(mod_name)
        heterozygosity = glv.conf.heterozygosity

        # continue to next phase
        glv.outlist.outfile[mod_name] = list()

        start = time.time()

        # for each region
        for (input_file, region) in zip(glv.outlist.outfile['mpileup'],
                                        glv.conf.region_bed_list):

            out_file1 = "{}/{}_{}.{}{}".format(out_dir, mod_name, region,
                                               'annote', '.vcf.gz')
            out_file2 = "{}/{}_{}.{}{}".format(out_dir, mod_name, region,
                                               'h**o', '.vcf.gz')

            if heterozygosity == 'h**o':
                glv.outlist.outfile[mod_name].append(out_file2)
            else:
                glv.outlist.outfile[mod_name].append(out_file1)

            if utl.progress_check(mod_name) == False:
                log.info("progress={} so skip {}.".format(
                    glv.conf.progress, mod_name))
                continue

            log.info("go on {}".format(mod_name))

            view1 = '{} {} {} -O v -r {} {}'
            v1_cmd = view1.format('bcftools', 'view',
                                  glv.conf.snpf_view1_param, region,
                                  input_file)

            pipe_annotate = '{} {} {} -O z --threads {} -o {}'
            an_cmd = pipe_annotate.format('bcftools', 'annotate',
                                          glv.conf.snpf_annotate_param,
                                          glv.conf.thread, out_file1)

            cmd1 = '{} | {}'.format(v1_cmd, an_cmd)

            utl.try_exec(cmd1)
            utl.tabix(out_file1)

            #-------------------------
            if heterozygosity != 'h**o':
                continue

            view2 = '{} {} {} -O z --threads {} -r {} -o {} {}'
            cmd2 = view2.format('bcftools', 'view', glv.conf.snpf_view2_param,
                                glv.conf.thread, region, out_file2, out_file1)

            utl.save_to_tmpfile(out_file2)

            utl.try_exec(cmd2)
            utl.tabix(out_file2)

            tabix1 = "{}.tbi".format(out_file1)
            os.remove(out_file1)
            os.remove(tabix1)
            log.info("remove {} {}".format(out_file1, tabix1))

        log.info("snpfilter finished {}".format(
            utl.elapsed_time(time.time(), start)))