Ejemplo n.º 1
0
 def check_gff_extension(self):
     if self.gff_extension != 'gff' and self.gff_extension != 'gtf':
         print(time_stamp(),
               "!!WARNING!! {}'s extension is not 'gff' or 'gtf'\n".format(
                   self.args.gff),
               flush=True)
         sys.exit(1)
Ejemplo n.º 2
0
    def run_prinseq(self):
        cmd = 'seq -f %03g {0} | \
               xargs -P {0} \
                     -I % \
               prinseq-lite.pl -trim_left 5 \
                               -trim_right 20 \
                               -trim_qual_window 10 \
                               -trim_qual_right 20 \
                               -min_len 75 \
                               -min_qual_mean 20 \
                               -fastq {1}/20_fastq/FaQCs_{2}/{2}.1.trimmed.fastq.split/{2}.1.trimmed.part_%.fastq \
                               -fastq2 {1}/20_fastq/FaQCs_{2}/{2}.1.trimmed.fastq.split/{2}.2.trimmed.part_%.fastq \
                               -out_good {1}/20_fastq/prinseq_{2}/{2}.part_% \
                               -out_bad null \
                               > {1}/log/prinseq_{2}.log \
                               2>&1'.format(self.N_threads, self.args.out,
                                            self.index)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)

        except sbp.CalledProcessError:
            print(time_stamp(), '!!ERROR!! {}\n'.format(cmd), flush=True)
            sys.exit(1)
Ejemplo n.º 3
0
    def alignment(self):
        aln = Alignment(args)
        aln_args = []

        N_files = len(self.args.rna_seq) + len(self.args.whole_genome)
        N_process, each_threads = get_proc_numbers(self.args.threads, N_files)

        for i, fastq in enumerate(self.args.rna_seq):
            fastq1 = fastq.split(',')[0]
            fastq2 = fastq.split(',')[1]
            index = 'RNA-seq.0{:0>3}'.format(i)
            aln_arg = '\t'.join([fastq1, fastq2, index, 'RNA'])
            aln_args.append(aln_arg)

            print(time_stamp(),
                  "{}'s prefix -> {}.".format(fastq1, index),
                  flush=True)

            print(time_stamp(),
                  "{}'s prefix -> {}.".format(fastq2, index),
                  flush=True)

        for i, fastq in enumerate(self.args.whole_genome):
            fastq1 = fastq.split(',')[0]
            fastq2 = fastq.split(',')[1]
            index = 'WGS.0{:0>3}'.format(i)
            aln_arg = '\t'.join([fastq1, fastq2, index, 'Genome'])
            aln_args.append(aln_arg)

            print(time_stamp(),
                  "{}'s prefix -> {}.".format(fastq1, index),
                  flush=True)

            print(time_stamp(),
                  "{}'s prefix -> {}.".format(fastq2, index),
                  flush=True)

        for i in range(N_files):
            aln_args[i] = aln_args[i] + '\t' + str(each_threads[i])

        p = Pool(N_process)
        p.map(aln.run, aln_args)
        p.close()

        print(time_stamp(),
              'alignment successfully finished.',
              flush=True)
Ejemplo n.º 4
0
 def run(self):
     print(time_stamp(),
           'start to annotate the reference genome.',
           flush=True)
     self.merge_bam()
     self.transciptome_assembly()
     self.make_gff()
     self.run_gffread()
Ejemplo n.º 5
0
    def run(self):
        print(time_stamp(), 'start to call variants.', flush=True)
        chr_names = self.get_header()

        p = Pool(self.args.threads)
        p.map(self.mpileup, chr_names)
        p.close()

        self.concat()
        self.mkindex()
Ejemplo n.º 6
0
    def __init__(self, args):
        self.args = args

        if self.args.disable_RNAseq_trim:
            print(time_stamp(), 'disable the trimming of RNA-seq.', flush=True)

        if self.args.disable_WGS_trim:
            print(time_stamp(), 'disable the trimming of WGS.', flush=True)

        if self.args.disable_RNAseq_trim and \
           self.args.disable_WGS_trim:
            print(time_stamp(), 'start to align reads.', flush=True)
        else:
            print(time_stamp(), 'start to trim and align reads.', flush=True)

        os.mkdir('{}/20_fastq'.format(self.args.out))
        os.mkdir('{}/30_bam'.format(self.args.out))

        self.write_json()
        os.mkdir('{0}/40_bed/'.format(self.args.out))
Ejemplo n.º 7
0
    def run(self):
        print(time_stamp(), 'start to index reference fasta.', flush=True)

        cmd1 = 'hisat2-build -p {0} {1} {1} \
                > {2}/log/hisat2-build.log \
                2>&1'.format(self.args.threads, self.args.ref, self.args.out)

        cmd2 = 'samtools faidx {} \
                > {}/log/samtools_faidx.log \
                2>&1'.format(self.args.ref, self.args.out)

        cmd1 = clean_cmd(cmd1)
        cmd2 = clean_cmd(cmd2)

        print(time_stamp(), 'hisat2-build...', flush=True)

        try:
            sbp.run(cmd1,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)

        except sbp.CalledProcessError:
            call_log(self.args.out, 'hisat2-build', cmd1)
            sys.exit(1)

        try:
            sbp.run(cmd2,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)

        except sbp.CalledProcessError:
            call_log(self.args.out, 'samtools_faidx', cmd2)
            sys.exit(1)

        print(time_stamp(),
              'indexing of the reference genome successfully finished.',
              flush=True)
Ejemplo n.º 8
0
 def check_max_threads(self, args):
     max_cpu = multi.cpu_count()
     print(
         time_stamp(),
         'maximum number of threads which you can use is up to {}.'.format(
             max_cpu),
         flush=True)
     if max_cpu <= args.threads:
         sys.stderr.write(
             ('!!WARNING!! You can use up to {0} threads. '
              'This program will use {0} threads.\n').format(max_cpu))
         sys.stderr.flush()
         args.threads = max_cpu
     elif args.threads < 1:
         args.threads = max_cpu
     return args
Ejemplo n.º 9
0
    def remove_duplicates(self):
        cmd = 'cat {0}/candidate_genes_from_*.{1} | \
               cut -f 1-9 | \
               sort -u > {0}/all_candidate_genes.{1}'.format(
            self.args.out, self.gff_extension)

        cmd = clean_cmd(cmd)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            print(time_stamp(), '!!ERROR!! {}\n'.format(cmd), flush=True)
            sys.exit(1)
Ejemplo n.º 10
0
    def filter_candidates(self):
        cmd = 'jiji -a {0}/50_annotation/annotation.gff \
                    -b {0}/40_bed \
                    -v {0}/60_vcf/raiden.vcf.gz \
                    -o {0}/70_result'.format(self.args.out)

        cmd = clean_cmd(cmd)

        try:
            sbp.run(cmd,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)
        except sbp.CalledProcessError:
            print(time_stamp(), 
                  '!!ERROR!! {}\n'.format(cmd), 
                  flush=True)
            sys.exit(1)

        shutil.move('{0}/70_result/jiji_PA_bedtools.log'.format(self.args.out), 
                    '{0}/log/'.format(self.args.out))
        shutil.move('{0}/70_result/jiji_mut_bedtools.log'.format(self.args.out), 
                    '{0}/log/'.format(self.args.out))
Ejemplo n.º 11
0
    def gzip_prinseq(self):
        cmd1 = 'pigz -p {0} \
                        {1}/20_fastq/prinseq_{2}/{2}_1.fastq'.format(
            self.N_threads, self.args.out, self.index)

        cmd2 = 'pigz -p {0} \
                        {1}/20_fastq/prinseq_{2}/{2}_2.fastq'.format(
            self.N_threads, self.args.out, self.index)

        cmd3 = 'pigz -p {0} \
                        {1}/20_fastq/prinseq_{2}/{2}_1_singletons.fastq'.format(
            self.N_threads, self.args.out, self.index)

        cmd4 = 'pigz -p {0} \
                        {1}/20_fastq/prinseq_{2}/{2}_2_singletons.fastq'.format(
            self.N_threads, self.args.out, self.index)

        cmd1 = clean_cmd(cmd1)
        cmd2 = clean_cmd(cmd2)
        cmd3 = clean_cmd(cmd3)
        cmd4 = clean_cmd(cmd4)

        try:
            sbp.run(cmd1,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)

        except sbp.CalledProcessError:
            print(time_stamp(), '!!ERROR!! {}\n'.format(cmd1), flush=True)
            sys.exit(1)

        try:
            sbp.run(cmd2,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)

        except sbp.CalledProcessError:
            print(time_stamp(), '!!ERROR!! {}\n'.format(cmd2), flush=True)
            sys.exit(1)

        try:
            sbp.run(cmd3,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)

        except sbp.CalledProcessError:
            print(time_stamp(), '!!ERROR!! {}\n'.format(cmd3), flush=True)
            sys.exit(1)

        try:
            sbp.run(cmd4,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)

        except sbp.CalledProcessError:
            print(time_stamp(), '!!ERROR!! {}\n'.format(cmd4), flush=True)
            sys.exit(1)
Ejemplo n.º 12
0
    def gzip_FaQCs(self):
        cmd1 = 'pigz -p {0} \
                     {1}/20_fastq/FaQCs_{2}/{2}.1.trimmed.fastq'.format(
            self.N_threads, self.args.out, self.index)

        cmd2 = 'pigz -p {0} \
                     {1}/20_fastq/FaQCs_{2}/{2}.2.trimmed.fastq'.format(
            self.N_threads, self.args.out, self.index)

        cmd3 = 'pigz -p {0} \
                     {1}/20_fastq/FaQCs_{2}/{2}.discard.trimmed.fastq'.format(
            self.N_threads, self.args.out, self.index)

        cmd4 = 'pigz -p {0} \
                     {1}/20_fastq/FaQCs_{2}/{2}.unpaired.trimmed.fastq'.format(
            self.N_threads, self.args.out, self.index)

        cmd1 = clean_cmd(cmd1)
        cmd2 = clean_cmd(cmd2)
        cmd3 = clean_cmd(cmd3)
        cmd4 = clean_cmd(cmd4)

        if os.path.isfile('{0}/20_fastq/FaQCs_{1}/{1}.1.trimmed.fastq'.format(
                self.args.out, self.index)):
            try:
                sbp.run(cmd1,
                        stdout=sbp.DEVNULL,
                        stderr=sbp.DEVNULL,
                        shell=True,
                        check=True)

            except sbp.CalledProcessError:
                print(time_stamp(), '!!ERROR!! {}\n'.format(cmd1), flush=True)
                sys.exit(1)

        if os.path.isfile('{0}/20_fastq/FaQCs_{1}/{1}.2.trimmed.fastq'.format(
                self.args.out, self.index)):
            try:
                sbp.run(cmd2,
                        stdout=sbp.DEVNULL,
                        stderr=sbp.DEVNULL,
                        shell=True,
                        check=True)

            except sbp.CalledProcessError:
                print(time_stamp(), '!!ERROR!! {}\n'.format(cmd2), flush=True)
                sys.exit(1)

        if os.path.isfile(
                '{0}/20_fastq/FaQCs_{1}/{1}.discard.trimmed.fastq'.format(
                    self.args.out, self.index)):
            try:
                sbp.run(cmd3,
                        stdout=sbp.DEVNULL,
                        stderr=sbp.DEVNULL,
                        shell=True,
                        check=True)

            except sbp.CalledProcessError:
                print(time_stamp(), '!!ERROR!! {}\n'.format(cmd3), flush=True)
                sys.exit(1)

        if os.path.isfile(
                '{0}/20_fastq/FaQCs_{1}/{1}.unpaired.trimmed.fastq'.format(
                    self.args.out, self.index)):
            try:
                sbp.run(cmd4,
                        stdout=sbp.DEVNULL,
                        stderr=sbp.DEVNULL,
                        shell=True,
                        check=True)

            except sbp.CalledProcessError:
                print(time_stamp(), '!!ERROR!! {}\n'.format(cmd4), flush=True)
                sys.exit(1)
Ejemplo n.º 13
0
def main():
    print(time_stamp(), 'start to filter the causal genes.', flush=True)
    Jiji(args).run()
    print(time_stamp(),
          'Filtering process successfully finished.\n',
          flush=True)
Ejemplo n.º 14
0
def main():
    print(time_stamp(), 'start to run RaIDeN.', flush=True)
    RaIDeN(args).run()
    print(time_stamp(), 'RaIDeN successfully finished.\n', flush=True)
Ejemplo n.º 15
0
    def merge_fastq(self):
        cmd1 = 'cat {0}/20_fastq/prinseq_{1}/{1}.part_*_1.fastq \
                  > {0}/20_fastq/prinseq_{1}/{1}_1.fastq'.format(
            self.args.out, self.index)

        cmd2 = 'cat {0}/20_fastq/prinseq_{1}/{1}.part_*_2.fastq > \
                    {0}/20_fastq/prinseq_{1}/{1}_2.fastq'.format(
            self.args.out, self.index)

        cmd3 = 'cat {0}/20_fastq/prinseq_{1}/{1}.part_*_1_singletons.fastq \
                  > {0}/20_fastq/prinseq_{1}/{1}_1_singletons.fastq'.format(
            self.args.out, self.index)

        cmd4 = 'cat {0}/20_fastq/prinseq_{1}/{1}.part_*_2_singletons.fastq \
                  > {0}/20_fastq/prinseq_{1}/{1}_2_singletons.fastq'.format(
            self.args.out, self.index)

        cmd1 = clean_cmd(cmd1)
        cmd2 = clean_cmd(cmd2)
        cmd3 = clean_cmd(cmd3)
        cmd4 = clean_cmd(cmd4)

        try:
            sbp.run(cmd1,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)

        except sbp.CalledProcessError:
            print(time_stamp(), '!!ERROR!! {}\n'.format(cmd1), flush=True)
            sys.exit(1)

        try:
            sbp.run(cmd2,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)

        except sbp.CalledProcessError:
            print(time_stamp(), '!!ERROR!! {}\n'.format(cmd2), flush=True)
            sys.exit(1)

        try:
            sbp.run(cmd3,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)

        except sbp.CalledProcessError:
            print(time_stamp(), '!!ERROR!! {}\n'.format(cmd3), flush=True)
            sys.exit(1)

        try:
            sbp.run(cmd4,
                    stdout=sbp.DEVNULL,
                    stderr=sbp.DEVNULL,
                    shell=True,
                    check=True)

        except sbp.CalledProcessError:
            print(time_stamp(), '!!ERROR!! {}\n'.format(cmd4), flush=True)
            sys.exit(1)