Beispiel #1
0
 def run(self):
     tmp = run_cmd([
         'Rscript', RQ_PLOT, '{}/reads_quality_plot/'.format(OutDir),
         SampleInf
     ])
     with self.output().open('w') as reads_quality_plot_log:
         reads_quality_plot_log.write(tmp)
Beispiel #2
0
    def run(self):

        download_biomart_go_cmd = [
            'Rscript', BIOMART_DOWNLOAD, '--gene_tr_file',
            '{0}/{1}.gene_trans_map.txt'.format(annotation_dir, species_latin),
            '--output', '{0}/{1}'.format(annotation_dir, species_latin),
            '--species', '{}'.format(species_ensembl)
        ]

        get_topgo_go_cmd = [
            'python', TOPGO_FORMAT, '--biomart_go',
            '{0}/{1}.go.txt'.format(annotation_dir, species_latin),
            '--out_dir', '{}'.format(annotation_dir)
        ]

        gene_go_anno_cmd = [
            'python', GO_ANNO, '{0}/{1}.go.txt'.format(annotation_dir,
                                                       species_latin),
            '{0}/{1}.go_detail.txt'.format(annotation_dir, species_latin),
            '{0}/{1}.go_anno.txt'.format(annotation_dir, species_latin)
        ]

        go_cmd_list = [
            download_biomart_go_cmd, get_topgo_go_cmd, gene_go_anno_cmd
        ]
        go_annotation_log_inf = run_cmd(go_cmd_list)

        with self.output().open('w') as go_annotation_log:
            go_annotation_log.write(go_annotation_log_inf)
Beispiel #3
0
    def run(self):
        log_dir = path.join(OutDir, 'logs')
        kallisto_dir = path.join(OutDir, 'kallisto')

        tmp = run_cmd(['mkdir', '-p', log_dir, kallisto_dir])
        with self.output().open('w') as prepare_logs:
            prepare_logs.write(tmp)
Beispiel #4
0
 def run(self):
     tmp = run_cmd([
         'python', FASTQC_SUMMERY, '{}'.format(SampleInf),
         '{}'.format(OutDir), '{}/fastqc_general_stats'.format(OutDir)
     ])
     with self.output().open('w') as qc_summary:
         qc_summary.write(tmp)
Beispiel #5
0
    def run(self):
        fasta_file_name = path.basename(self.fasta_file)

        # orf_pred_cmd = ['TransDecoder.LongOrfs',
        #                 '-t',
        #                 '{}/stringtie_merge.fa'.format(OutDir),
        #                 '--gene_trans_map',
        #                 SampleInf]
        # link_orf_cmd = ['ln',
        #                 '-s',
        #                 '{}/stringtie_merge.fa.transdecoder_dir/longest_orfs.pep'.format(
        #                     OutDir),
        #                 '{}/annotation']

        blast_cmd = [
            'blastp', '-query', self.fasta_file, '-db', self.database,
            '-evalue', '1e-5', '-outfmt',
            '6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle',
            '-max_target_seqs', '1', '-num_threads', BLAST_THREAD, '-out',
            '{0}/{1}.blasttab'.format(self.blast_out_dir, fasta_file_name)
        ]

        blast_log_inf = run_cmd(blast_cmd)

        with self.output().open('w') as blast_log:
            blast_log.write(blast_log_inf)
Beispiel #6
0
    def run(self):
        group1, group2 = self.compare
        group1_bam_list = [
            '{0}/{1}.bam'.format(BamDir, each_sample)
            for each_sample in group_sample_df.loc[group1][1]
        ]
        group2_bam_list = [
            '{0}/{1}.bam'.format(BamDir, each_sample)
            for each_sample in group_sample_df.loc[group2][1]
        ]
        out_dir = path.join(OutDir, 'rmats',
                            '{0}_vs_{1}'.format(group1, group2))

        tmp = run_cmd([
            'RNASeq-MATS.py', '-b1', '{}'.format(','.join(group1_bam_list)),
            '-b2', '{}'.format(','.join(group2_bam_list)), '-t', 'paired',
            '-len', '150', '-gtf', '{}'.format(Gtf), '-o',
            '{}'.format(out_dir), '-c',
            '{}'.format(splicing_difference_cutoff), '-analysis',
            '{}'.format(analysis_type), '-libType', '{}'.format(library_type),
            '-novelSS', '{0}'.format(novel_splice)
        ])

        with self.output().open('w') as run_rmats_log:
            run_rmats_log.write(tmp)
Beispiel #7
0
    def run(self):

        log_dir = path.join(self.OutDir, 'logs')
        qc_data_dir = path.join(self.OutDir, 'qc_data')
        prepare_inf = run_cmd(['mkdir', '-p', log_dir, qc_data_dir])
        with self.output().open('w') as prepare_logs:
            prepare_logs.write(prepare_inf)
Beispiel #8
0
 def run(self):
     tmp = run_cmd([
         'read_distribution.py', '-i',
         '{0}/{1}.bam'.format(BamDir, self.sample), '-r', BedFile
     ])
     with self.output().open('w') as read_distribution_inf:
         read_distribution_inf.write(tmp)
Beispiel #9
0
 def run(self):
     log_dir = path.join(OutDir, 'logs')
     mapping_dir = path.join(OutDir, 'mapping_dir')
     bam_dir = path.join(OutDir, 'bam_dir')
     tmp = run_cmd(['mkdir', '-p', log_dir, mapping_dir, bam_dir])
     with self.output().open('w') as prepare_logs:
         prepare_logs.write(tmp)
Beispiel #10
0
    def run(self):
        diff_cmd = []
        diff_cmd.append(['Rscript',
                         DIFF_ANALYSIS,
                         '--kallisto_dir',
                         '{}/kallisto'.format(OutDir),
                         '--tpm_table',
                         '{}/expression_summary/Gene.tpm.txt'.format(OutDir),
                         '--compare',
                         self.compare,
                         '--sample_inf',
                         SampleInf,
                         '--gene2tr',
                         Gene2Tr,
                         '--out_dir',
                         '{0}/differential_analysis/{1}'.format(
                             OutDir, self.compare),
                         '--qvalue',
                         str(Qvalue),
                         '--logfc',
                         str(LogFC)])

        if Anno:
            diff_cmd.append(['python',
                             QUANT_ANNO,
                             '-a',
                             Anno,
                             '-q',
                             '{0}/differential_analysis/{1}'.format(
                                 OutDir, self.compare)])

        diff_inf = run_cmd(diff_cmd)
        with self.output().open('w') as diff_log:
            diff_log.write(diff_inf)
Beispiel #11
0
 def run(self):
     tmp = run_cmd([
         'infer_experiment.py', '-i',
         '{0}/{1}.bam'.format(BamDir, self.sample), '-r', BedFile
     ])
     with self.output().open('w') as infer_experiment_log:
         infer_experiment_log.write(tmp)
Beispiel #12
0
 def run(self):
     tmp = run_cmd([
         'junction_saturation.py', '-i',
         '{0}/{1}.bam'.format(BamDir, self.sample), '-r', BedFile, '-o',
         '{0}/junction_saturation/{1}'.format(OutDir, self.sample)
     ])
     with self.output().open('w') as junction_saturation_log:
         junction_saturation_log.write(tmp)
Beispiel #13
0
    def run(self):
        log_dir = path.join(OutDir, 'logs')
        rmats_dir = path.join(OutDir, 'rmats')

        tmp = run_cmd(['mkdir', '-p', log_dir, rmats_dir])

        with self.output().open('w') as prepare_log:
            prepare_log.write(tmp)
Beispiel #14
0
 def run(self):
     tmp = run_cmd([
         'geneBody_coverage.py', '-i',
         '{0}/{1}.bam'.format(BamDir, self.sample), '-r', BedFile, '-o',
         '{0}/genebody_coverage/{1}'.format(OutDir, self.sample)
     ])
     with self.output().open('w') as genebody_coverage_log:
         genebody_coverage_log.write(tmp)
Beispiel #15
0
    def run(self):
        tmp = run_cmd([
            'python', READ_DISTRIBUTION_PLOT_PREPARE, SampleInf,
            '{0}/read_distribution/'.format(OutDir)
        ])

        with self.output().open('w') as read_distribution_plot_prepare_logs:
            read_distribution_plot_prepare_logs.write(tmp)
Beispiel #16
0
 def run(self):
     tmp = run_cmd([
         FASTQC, '{0}/{1}_1.clean.fq.gz'.format(CleanDir, self.sample),
         '{0}/{1}_2.clean.fq.gz'.format(CleanDir, self.sample), '--extract',
         '-o', '{}/fastqc_results'.format(OutDir)
     ])
     with self.output().open('w') as qc_logs:
         qc_logs.write(tmp)
Beispiel #17
0
 def run(self):
     tmp = run_cmd([
         'inner_distance.py', '-i',
         '{0}/{1}.bam'.format(BamDir, self.sample), '-r', BedFile, '-o',
         '{0}/inner_distance/{1}'.format(OutDir, self.sample)
     ])
     with self.output().open('w') as inner_distance_log:
         inner_distance_log.write(tmp)
Beispiel #18
0
 def run(self):
     gc_dir = path.join(OutDir, 'gc_plot')
     tmp = run_cmd([
         'Rscript', GC_PLOT_R, '--gc_dir', gc_dir, '--out_dir', gc_dir,
         '--sample_inf', SampleInf
     ])
     with self.output().open('w') as gc_plot_log:
         gc_plot_log.write(tmp)
Beispiel #19
0
 def run(self):
     tmp = run_cmd([
         'read_duplication.py', '-i',
         '{0}/{1}.bam'.format(BamDir, self.sample), '-o',
         '{0}/read_duplication/{1}'.format(OutDir, self.sample)
     ])
     with self.output().open('w') as read_duplication_log:
         read_duplication_log.write(tmp)
Beispiel #20
0
    def run(self):
        log_dir = path.join(self.OutDir, 'logs')
        bam_process = path.join(self.OutDir, 'bam')

        tmp = run_cmd(['mkdir', '-p', log_dir, bam_process])

        with self.output().open('w') as prepare_log:
            prepare_log.write(tmp)
Beispiel #21
0
    def run(self):
        tmp = run_cmd([
            'Rscript', GO_ANALYSIS_R, '--quant_dir', QuantDir, '--go_anno',
            GoseqAnno, '--gene_length', GeneLen, '--topgo_anno', TopgoAnno,
            '--out_dir', OutDir
        ])

        with self.output().open('w') as go_logs_inf:
            go_logs_inf.write(tmp)
Beispiel #22
0
    def run(self):
        log_dir = path.join(OutDir, 'logs')
        assembly_dir = path.join(OutDir, 'assembly_dir')
        annotation_dir = path.join(OutDir, 'annotation')

        tmp = run_cmd(['mkdir', '-p', log_dir, assembly_dir, annotation_dir])

        with self.output().open('w') as prepare_logs:
            prepare_logs.write(tmp)
Beispiel #23
0
    def run(self):
        tmp = run_cmd([
            'Rscript', KALLISTO_TO_DIFF, '--quant_dir',
            '{}/kallisto'.format(OutDir), '--sample_inf', SampleInf,
            '--gene2tr', Gene2Tr, '--out_dir', OutDir
        ])

        with self.output().open('w') as run_diff_logs:
            run_diff_logs.write(tmp)
Beispiel #24
0
    def run(self):
        transcript_feature_cmd = [
            'python', TRANSCRIPT_FEATURE, '--gtf', ref_gtf, '--species',
            species_latin, '--out_dir', annotation_dir
        ]

        transcript_inf_table_log_inf = run_cmd(transcript_feature_cmd)

        with self.output().open('w') as transcript_inf_table_log:
            transcript_inf_table_log.write(transcript_inf_table_log_inf)
Beispiel #25
0
    def run(self):
        tmp = run_cmd([
            'Rscript', ENRICH_BARPLOT_R, '--anno', GoseqAnno, '--table',
            '{0}/go/{1}'.format(OutDir, self.compare), '--diff',
            '{0}/differential_analysis/{1}'.format(QuantDir, self.compare),
            '--type', 'go', '--out', '{0}/go/{1}'.format(OutDir, self.compare)
        ])

        with self.output().open('w') as go_plot_logs:
            go_plot_logs.write(tmp)
Beispiel #26
0
    def run(self):
        tmp = run_cmd([
            'python', KEGG_ANALYSIS_PYTHON, '--blast_out', KEGGBlast,
            '--species', KEGGAbbr, '--background', KEGGBackground,
            '--diff_dir', '{}/differential_analysis/'.format(QuantDir),
            '--out_dir', '{}/kegg'.format(OutDir)
        ])

        with self.output().open('w') as kegg_log_inf:
            kegg_log_inf.write(tmp)
Beispiel #27
0
    def run(self):
        tin_dir = path.join(OutDir, 'tin')
        chdir(tin_dir)
        tin_inf = run_cmd([
            'tin.py', '-i', '{0}/{1}.bam'.format(BamDir, self.sample), '-r',
            BedFile
        ])

        with self.output().open('w') as tin_log:
            tin_log.write(tin_inf)
Beispiel #28
0
    def run(self):
        tmp = run_cmd([
            'Rscript', ENRICH_BARPLOT_R, '--anno', KEGGBlast, '--table',
            '{0}/kegg/{1}'.format(OutDir, self.compare), '--diff',
            '{0}/differential_analysis/{1}'.format(QuantDir,
                                                   self.compare), '--type',
            'kegg', '--out', '{0}/kegg/{1}'.format(OutDir, self.compare)
        ])

        with self.output().open('w') as kegg_plog_logs:
            kegg_plog_logs.write(tmp)
Beispiel #29
0
    def run(self):
        tmp = run_cmd([
            'java', '-jar', '{}'.format(GATK_PATH), '-T', 'VariantFiltration',
            '-window', '35', '-cluster', '3', '-filterName', 'FS', '-filter',
            'FS > 30.0', '-filterName', 'QD', '-filter', 'QD < 2.0', '-R',
            '{}'.format(Ref), '-V', '{}/snp.raw.vcf'.format(OutDir), '-o',
            '{}/snp.filter.vcf'.format(OutDir)
        ])

        with self.output().open('w') as snp_filter_log:
            snp_filter_log.write(tmp)
Beispiel #30
0
    def run(self):
        report_tb_cmd = ['Rscript',
                         QUANT_REPORT,
                         '--quant_dir',
                         OutDir,
                         '--sample_inf',
                         SampleInf]

        report_tb_inf = run_cmd(report_tb_cmd)
        with self.output().open('w') as report_log:
            report_log.write(report_tb_inf)