예제 #1
0
    def mpileup(self, chr_name):
        cmd1 = 'bcftools mpileup -a AD,ADF,ADR \
                                 -B \
                                 -q {0}\
                                 -Q {1} \
                                 -C {2} \
                                 -O u \
                                 -r {3} \
                                 -f {4} \
                                 --ignore-RG \
                                 {5}/20_bam/parent.bam \
                                 {5}/20_bam/bulk1.bam \
                                 {5}/20_bam/bulk2.bam | \
                bcftools call -vm \
                              -f GQ,GP \
                              -O u | \
                bcftools filter -i "INFO/MQ>={0}" \
                                -O z \
                                -o {5}/30_vcf/qtlseq.{3}.vcf.gz \
                                >> {5}/log/bcftools.{3}.log \
                                2>&1'.format(self.args.min_MQ,
                                             self.args.min_BQ,
                                             self.args.adjust_MQ,
                                             chr_name,
                                             self.args.ref,
                                             self.out)

        cmd2 = 'tabix -f \
                      -p vcf \
                      {0}/30_vcf/qtlseq.{1}.vcf.gz \
                      >> {0}/log/tabix.{1}.log \
                      2>&1'.format(self.out, chr_name)

        cmd1 = clean_cmd(cmd1)
        cmd2 = clean_cmd(cmd2)

        try:
            sbp.run(cmd1,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'bcftools', cmd1)
            sys.exit(1)

        try:
            sbp.run(cmd2,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'tabix', cmd2)
            sys.exit(1)
예제 #2
0
    def run(self, fastq1, fastq2, index):
        print(time_stamp(),
              'start to align reads of {} by BWA.'.format(index),
              flush=True)

        cmd = 'bwa mem -t {0} {1} {2} {3} | \
               samtools view -b \
                             -o {4}/20_bam/{5}.bam \
                             >> {4}/log/bwa.log \
                             2>&1'.format(self.args.threads,
                                          self.args.ref,
                                          fastq1,
                                          fastq2,
                                          self.out,
                                          index)

        cmd = clean_cmd(cmd)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'bwa', cmd)
            sys.exit()

        print(time_stamp(),
              'alignment {} successfully finished.'.format(index),
              flush=True)
예제 #3
0
    def qtlplot(self):
        cmd = 'qtlplot -v {0}/30_vcf/qtlseq.vcf.gz \
                           -F {1} \
                           -n1 {2} \
                           -n2 {3} \
                           -t {4} \
                           -w {5} \
                           -s {6} \
                           -N {7} \
                           -D {8} \
                           -d {9} \
                           -o {0}/40_qtlseq'.format(self.out,
                                                    self.args.filial,
                                                    self.args.N_bulk1,
                                                    self.args.N_bulk2,
                                                    self.args.threads,
                                                    self.args.window,
                                                    self.args.step,
                                                    self.args.N_rep,
                                                    self.args.max_depth,
                                                    self.args.min_depth)
        if self.args.snpEff is not None:
            cmd = cmd + ' -e {}'.format(self.args.snpEff)
        
        if self.args.species is not None:
            cmd = cmd + ' --species {}'.format(self.args.species)

        cmd = clean_cmd(cmd)
        p = sbp.Popen(cmd,
                      stdout=sbp.PIPE,
                      stderr=sbp.STDOUT,
                      shell=True)

        for line in iter(p.stdout.readline, b''):
            print(line.rstrip().decode('utf-8'), flush=True)
예제 #4
0
    def merge(self, label):
        cmd1 = 'cat {0}/log/samtools.{1}.f83.log \
                    {0}/log/samtools.{1}.f99.log \
                    {0}/log/samtools.{1}.f147.log \
                    {0}/log/samtools.{1}.f163.log \
                    > {0}/log/samtools.{1}.log'.format(self.out, label)

        cmd2 = 'rm -f {0}/20_bam/{1}.bam'.format(self.out, label)
        cmd3 = 'rm -f {0}/log/samtools.{1}.f*.log'.format(self.out, label)

        cmd4 = 'samtools merge -f {0}/20_bam/{1}.filt.bam \
                                  {0}/20_bam/{1}.f83.bam \
                                  {0}/20_bam/{1}.f99.bam \
                                  {0}/20_bam/{1}.f147.bam \
                                  {0}/20_bam/{1}.f163.bam \
                                  >> {0}/log/samtools.{1}.log \
                                  2>&1'.format(self.out, label)

        cmd5 = 'rm -f {}/20_bam/{}.f83.bam'.format(self.out, label)
        cmd6 = 'rm -f {}/20_bam/{}.f99.bam'.format(self.out, label)
        cmd7 = 'rm -f {}/20_bam/{}.f147.bam'.format(self.out, label)
        cmd8 = 'rm -f {}/20_bam/{}.f163.bam'.format(self.out, label)

        cmd1 = clean_cmd(cmd1)
        cmd2 = clean_cmd(cmd2)
        cmd3 = clean_cmd(cmd3)
        cmd4 = clean_cmd(cmd4)
        cmd5 = clean_cmd(cmd5)
        cmd6 = clean_cmd(cmd6)
        cmd7 = clean_cmd(cmd7)
        cmd8 = clean_cmd(cmd8)

        sbp.run(cmd1, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)
        sbp.run(cmd2, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)
        sbp.run(cmd3, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)

        try:
            sbp.run(cmd4, 
                    stdout=sbp.DEVNULL, 
                    stderr=sbp.DEVNULL, 
                    shell=True, 
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'samtools', cmd4)
            sys.exit()

        sbp.run(cmd5, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)
        sbp.run(cmd6, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)
        sbp.run(cmd7, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)
        sbp.run(cmd8, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)
예제 #5
0
    def run(self):
        print(time_stamp(), 'start to index reference fasta.', flush=True)

        cmd1 = 'bwa index {} \
                >> {}/log/bwa.log \
                2>&1'.format(self.args.ref, self.out)

        cmd2 = 'samtools faidx {} \
                >> {}/log/samtools.log \
                2>&1'.format(self.args.ref, self.out)

        cmd1 = clean_cmd(cmd1)
        cmd2 = clean_cmd(cmd2)

        try:
            sbp.run(cmd1,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'bwa', cmd1)
            sys.exit()

        try:
            sbp.run(cmd2,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'samtools', cmd1)
            sys.exit()

        print(time_stamp(),
              'indexing of reference successfully finished.',
              flush=True)
예제 #6
0
    def mkindex(self):
        cmd = 'tabix -f \
                     -p vcf \
                     {0}/30_vcf/qtlseq.vcf.gz \
                     >> {0}/log/tabix.log \
                     2>&1'.format(self.out)

        cmd = clean_cmd(cmd)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'tabix', cmd)
            sys.exit(1)
예제 #7
0
    def run_snpEff(self):
        cmd = 'snpEff ann -s {0}/snpEff_summary.html \
                          {1} \
                          {2} \
                          1> {0}/qtlseq.snpEff.vcf \
                          2> {0}/snpEff.log'.format(self.out, self.snpEff,
                                                    self.vcf)
        cmd = clean_cmd(cmd)

        print(time_stamp(), 'start to run SnpEff.', flush=True)
        sbp.run(cmd,
                stdout=sbp.DEVNULL,
                stderr=sbp.DEVNULL,
                shell=True,
                check=True)
        print(time_stamp(), 'SnpEff successfully finished.', flush=True)

        self.args.vcf = '{}/qtlseq.snpEff.vcf'.format(self.out)
예제 #8
0
파일: bamfilt.py 프로젝트: hpli9527/QTL-seq
    def filt(self, label_with_flags):
        flag = label_with_flags[0]
        label = label_with_flags[1]
        cmd = 'samtools view -b \
                             -f {0} \
                             -o {1}/20_bam/{2}.f{0}.bam \
                             {1}/20_bam/{2}.bam \
                             >> {1}/log/samtools.{2}.f{0}.log \
                             2>&1'.format(flag, self.out, label)

        cmd = clean_cmd(cmd)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'samtools', cmd)
            sys.exit()
예제 #9
0
    def run(self, fastq1, fastq2, index):
        print(time_stamp(),
              'start to align reads of {} by BWA.'.format(index),
              flush=True)

        cmd = 'bwa mem -t {0} \
                       {1} {2} {3} | \
               samtools fixmate -m \
                                - \
                                - | \
               samtools sort -m {4} \
                             -@ {0} | \
               samtools markdup -r \
                                - \
                                - | \
               samtools view -b \
                             -f 2 \
                             -F 2048 \
                             -o {5}/20_bam/{6}.bam \
                             >> {5}/log/alignment.log \
                             2>&1'.format(self.args.threads, self.args.ref,
                                          fastq1, fastq2, self.args.mem,
                                          self.out, index)

        cmd = clean_cmd(cmd)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'alignment', cmd)
            sys.exit(1)

        print(time_stamp(),
              'alignment {} successfully finished.'.format(index),
              flush=True)
예제 #10
0
    def concat(self):
        cmd1 = 'cat {0}/log/bcftools.*.log > {0}/log/bcftools.log'.format(self.out)
        cmd2 = 'cat {0}/log/tabix.*.log > {0}/log/tabix.log'.format(self.out)

        cmd3 = 'bcftools concat -O z \
                                -o {0}/30_vcf/qtlseq.vcf.gz \
                                {0}/30_vcf/qtlseq.*.vcf.gz \
                                >> {0}/log/bcftools.log \
                                2>&1'.format(self.out)

        cmd4 = 'rm -f {}/30_vcf/qtlseq.*.vcf.gz'.format(self.out)
        cmd5 = 'rm -f {}/30_vcf/qtlseq.*.vcf.gz.tbi'.format(self.out)
        cmd6 = 'rm -f {}/log/bcftools.*.log'.format(self.out)
        cmd7 = 'rm -f {}/log/tabix.*.log'.format(self.out)

        cmd1 = clean_cmd(cmd1)
        cmd2 = clean_cmd(cmd2)
        cmd3 = clean_cmd(cmd3)
        cmd4 = clean_cmd(cmd4)
        cmd5 = clean_cmd(cmd5)
        cmd6 = clean_cmd(cmd6)
        cmd7 = clean_cmd(cmd7)

        sbp.run(cmd1, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)
        sbp.run(cmd2, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)

        try:
            sbp.run(cmd3,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'bcftools', cmd3)
            sys.exit(1)

        sbp.run(cmd4, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)
        sbp.run(cmd5, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)
        sbp.run(cmd6, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)
        sbp.run(cmd7, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True)
예제 #11
0
    def run(self, fastq1, fastq2, index):
        print(time_stamp(),
              'start trimming for {} and {}.'.format(fastq1, fastq2),
              flush=True)

        trim1 = '{}/00_fastq/{}.1.trim.fastq.gz'.format(self.out, index)
        trim2 = '{}/00_fastq/{}.2.trim.fastq.gz'.format(self.out, index)
        unpaired1 = '{}/00_fastq/{}.1.unpaired.fastq.gz'.format(
            self.out, index)
        unpaired2 = '{}/00_fastq/{}.2.unpaired.fastq.gz'.format(
            self.out, index)

        if (len(self.trim_params['ILLUMINACLIP']) == 0) or \
           ('<ADAPTER_FASTA>' in self.trim_params['ILLUMINACLIP']):
            cmd = 'trimmomatic PE -threads {} \
                                  -phred{} {} {} {} {} {} {} \
                                  LEADING:{} \
                                  TRAILING:{} \
                                  SLIDINGWINDOW:{} \
                                  MINLEN:{} \
                                  >> {}/log/trimmomatic.log \
                                  2>&1'.format(
                self.args.threads, self.trim_params['phred'], fastq1, fastq2,
                trim1, unpaired1, trim2, unpaired2,
                self.trim_params['LEADING'], self.trim_params['TRAILING'],
                self.trim_params['SLIDINGWINDOW'], self.trim_params['MINLEN'],
                self.out)
        else:
            cmd = 'trimmomatic PE -threads {} \
                                  -phred{} {} {} {} {} {} {} \
                                  ILLUMINACLIP:{} \
                                  LEADING:{} \
                                  TRAILING:{} \
                                  SLIDINGWINDOW:{} \
                                  MINLEN:{} \
                                  >> {}/log/trimmomatic.log \
                                  2>&1'.format(
                self.args.threads, self.trim_params['phred'], fastq1, fastq2,
                trim1, unpaired1, trim2, unpaired2,
                self.trim_params['ILLUMINACLIP'], self.trim_params['LEADING'],
                self.trim_params['TRAILING'],
                self.trim_params['SLIDINGWINDOW'], self.trim_params['MINLEN'],
                self.out)

        cmd = clean_cmd(cmd)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            call_log(self.out, 'trimmomatic', cmd)
            sys.exit()

        print(time_stamp(),
              'trimming for {} and {} successfully finished.'.format(
                  fastq1, fastq2),
              flush=True)

        aln = Alignment(self.args)
        aln.run(trim1, trim2, index)
예제 #12
0
    def merge(self):
        for label in ['parent', 'bulk1', 'bulk2']:
            bams = self.get_bams(label)
            if len(bams) == 1:
                path_to_bam = os.path.abspath(bams[0])
                cmd1 = 'ln -s {} {}/20_bam/{}.unsorted.filt.bam'.format(
                    path_to_bam, self.out, label)
            else:
                cmd1 = 'samtools merge -f {0}/20_bam/{1}.unsorted.filt.bam \
                                          {0}/20_bam/{1}*.filt.bam \
                                          >> {0}/log/samtools.log \
                                          2>&1'.format(self.out, label)

            cmd2 = 'samtools sort -m {0} \
                                  -@ {1} \
                                  -o {2}/20_bam/{3}.filt.bam \
                                  {2}/20_bam/{3}.unsorted.filt.bam \
                                  >> {2}/log/samtools.log \
                                  2>&1'.format(self.args.mem,
                                               self.args.threads, self.out,
                                               label)

            cmd3 = 'samtools index {0}/20_bam/{1}.filt.bam \
                                   >> {0}/log/samtools.log \
                                   2>&1'.format(self.out, label)

            cmd4 = 'rm -f {}/20_bam/{}.*.filt.bam'.format(self.out, label)

            cmd1 = clean_cmd(cmd1)
            cmd2 = clean_cmd(cmd2)
            cmd3 = clean_cmd(cmd3)
            cmd4 = clean_cmd(cmd4)

            try:
                sbp.run(cmd1,
                        stdout=sbp.DEVNULL,
                        stderr=sbp.DEVNULL,
                        shell=True,
                        check=True)
            except sbp.CalledProcessError:
                call_log(self.out, 'samtools', cmd1)
                sys.exit(1)

            try:
                sbp.run(cmd2,
                        stdout=sbp.DEVNULL,
                        stderr=sbp.DEVNULL,
                        shell=True,
                        check=True)
            except sbp.CalledProcessError:
                call_log(self.out, 'samtools', cmd2)
                sys.exit(1)

            try:
                sbp.run(cmd3,
                        stdout=sbp.DEVNULL,
                        stderr=sbp.DEVNULL,
                        shell=True,
                        check=True)
            except sbp.CalledProcessError:
                call_log(self.out, 'samtools', cmd3)
                sys.exit(1)

            sbp.run(cmd4,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)