def run(self): tmp = run_cmd([ 'Rscript', RQ_PLOT, '{}/reads_quality_plot/'.format(OutDir), SampleInf ]) with self.output().open('w') as reads_quality_plot_log: reads_quality_plot_log.write(tmp)
def run(self): download_biomart_go_cmd = [ 'Rscript', BIOMART_DOWNLOAD, '--gene_tr_file', '{0}/{1}.gene_trans_map.txt'.format(annotation_dir, species_latin), '--output', '{0}/{1}'.format(annotation_dir, species_latin), '--species', '{}'.format(species_ensembl) ] get_topgo_go_cmd = [ 'python', TOPGO_FORMAT, '--biomart_go', '{0}/{1}.go.txt'.format(annotation_dir, species_latin), '--out_dir', '{}'.format(annotation_dir) ] gene_go_anno_cmd = [ 'python', GO_ANNO, '{0}/{1}.go.txt'.format(annotation_dir, species_latin), '{0}/{1}.go_detail.txt'.format(annotation_dir, species_latin), '{0}/{1}.go_anno.txt'.format(annotation_dir, species_latin) ] go_cmd_list = [ download_biomart_go_cmd, get_topgo_go_cmd, gene_go_anno_cmd ] go_annotation_log_inf = run_cmd(go_cmd_list) with self.output().open('w') as go_annotation_log: go_annotation_log.write(go_annotation_log_inf)
def run(self): log_dir = path.join(OutDir, 'logs') kallisto_dir = path.join(OutDir, 'kallisto') tmp = run_cmd(['mkdir', '-p', log_dir, kallisto_dir]) with self.output().open('w') as prepare_logs: prepare_logs.write(tmp)
def run(self): tmp = run_cmd([ 'python', FASTQC_SUMMERY, '{}'.format(SampleInf), '{}'.format(OutDir), '{}/fastqc_general_stats'.format(OutDir) ]) with self.output().open('w') as qc_summary: qc_summary.write(tmp)
def run(self): fasta_file_name = path.basename(self.fasta_file) # orf_pred_cmd = ['TransDecoder.LongOrfs', # '-t', # '{}/stringtie_merge.fa'.format(OutDir), # '--gene_trans_map', # SampleInf] # link_orf_cmd = ['ln', # '-s', # '{}/stringtie_merge.fa.transdecoder_dir/longest_orfs.pep'.format( # OutDir), # '{}/annotation'] blast_cmd = [ 'blastp', '-query', self.fasta_file, '-db', self.database, '-evalue', '1e-5', '-outfmt', '6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle', '-max_target_seqs', '1', '-num_threads', BLAST_THREAD, '-out', '{0}/{1}.blasttab'.format(self.blast_out_dir, fasta_file_name) ] blast_log_inf = run_cmd(blast_cmd) with self.output().open('w') as blast_log: blast_log.write(blast_log_inf)
def run(self): group1, group2 = self.compare group1_bam_list = [ '{0}/{1}.bam'.format(BamDir, each_sample) for each_sample in group_sample_df.loc[group1][1] ] group2_bam_list = [ '{0}/{1}.bam'.format(BamDir, each_sample) for each_sample in group_sample_df.loc[group2][1] ] out_dir = path.join(OutDir, 'rmats', '{0}_vs_{1}'.format(group1, group2)) tmp = run_cmd([ 'RNASeq-MATS.py', '-b1', '{}'.format(','.join(group1_bam_list)), '-b2', '{}'.format(','.join(group2_bam_list)), '-t', 'paired', '-len', '150', '-gtf', '{}'.format(Gtf), '-o', '{}'.format(out_dir), '-c', '{}'.format(splicing_difference_cutoff), '-analysis', '{}'.format(analysis_type), '-libType', '{}'.format(library_type), '-novelSS', '{0}'.format(novel_splice) ]) with self.output().open('w') as run_rmats_log: run_rmats_log.write(tmp)
def run(self): log_dir = path.join(self.OutDir, 'logs') qc_data_dir = path.join(self.OutDir, 'qc_data') prepare_inf = run_cmd(['mkdir', '-p', log_dir, qc_data_dir]) with self.output().open('w') as prepare_logs: prepare_logs.write(prepare_inf)
def run(self): tmp = run_cmd([ 'read_distribution.py', '-i', '{0}/{1}.bam'.format(BamDir, self.sample), '-r', BedFile ]) with self.output().open('w') as read_distribution_inf: read_distribution_inf.write(tmp)
def run(self): log_dir = path.join(OutDir, 'logs') mapping_dir = path.join(OutDir, 'mapping_dir') bam_dir = path.join(OutDir, 'bam_dir') tmp = run_cmd(['mkdir', '-p', log_dir, mapping_dir, bam_dir]) with self.output().open('w') as prepare_logs: prepare_logs.write(tmp)
def run(self): diff_cmd = [] diff_cmd.append(['Rscript', DIFF_ANALYSIS, '--kallisto_dir', '{}/kallisto'.format(OutDir), '--tpm_table', '{}/expression_summary/Gene.tpm.txt'.format(OutDir), '--compare', self.compare, '--sample_inf', SampleInf, '--gene2tr', Gene2Tr, '--out_dir', '{0}/differential_analysis/{1}'.format( OutDir, self.compare), '--qvalue', str(Qvalue), '--logfc', str(LogFC)]) if Anno: diff_cmd.append(['python', QUANT_ANNO, '-a', Anno, '-q', '{0}/differential_analysis/{1}'.format( OutDir, self.compare)]) diff_inf = run_cmd(diff_cmd) with self.output().open('w') as diff_log: diff_log.write(diff_inf)
def run(self): tmp = run_cmd([ 'infer_experiment.py', '-i', '{0}/{1}.bam'.format(BamDir, self.sample), '-r', BedFile ]) with self.output().open('w') as infer_experiment_log: infer_experiment_log.write(tmp)
def run(self): tmp = run_cmd([ 'junction_saturation.py', '-i', '{0}/{1}.bam'.format(BamDir, self.sample), '-r', BedFile, '-o', '{0}/junction_saturation/{1}'.format(OutDir, self.sample) ]) with self.output().open('w') as junction_saturation_log: junction_saturation_log.write(tmp)
def run(self): log_dir = path.join(OutDir, 'logs') rmats_dir = path.join(OutDir, 'rmats') tmp = run_cmd(['mkdir', '-p', log_dir, rmats_dir]) with self.output().open('w') as prepare_log: prepare_log.write(tmp)
def run(self): tmp = run_cmd([ 'geneBody_coverage.py', '-i', '{0}/{1}.bam'.format(BamDir, self.sample), '-r', BedFile, '-o', '{0}/genebody_coverage/{1}'.format(OutDir, self.sample) ]) with self.output().open('w') as genebody_coverage_log: genebody_coverage_log.write(tmp)
def run(self): tmp = run_cmd([ 'python', READ_DISTRIBUTION_PLOT_PREPARE, SampleInf, '{0}/read_distribution/'.format(OutDir) ]) with self.output().open('w') as read_distribution_plot_prepare_logs: read_distribution_plot_prepare_logs.write(tmp)
def run(self): tmp = run_cmd([ FASTQC, '{0}/{1}_1.clean.fq.gz'.format(CleanDir, self.sample), '{0}/{1}_2.clean.fq.gz'.format(CleanDir, self.sample), '--extract', '-o', '{}/fastqc_results'.format(OutDir) ]) with self.output().open('w') as qc_logs: qc_logs.write(tmp)
def run(self): tmp = run_cmd([ 'inner_distance.py', '-i', '{0}/{1}.bam'.format(BamDir, self.sample), '-r', BedFile, '-o', '{0}/inner_distance/{1}'.format(OutDir, self.sample) ]) with self.output().open('w') as inner_distance_log: inner_distance_log.write(tmp)
def run(self): gc_dir = path.join(OutDir, 'gc_plot') tmp = run_cmd([ 'Rscript', GC_PLOT_R, '--gc_dir', gc_dir, '--out_dir', gc_dir, '--sample_inf', SampleInf ]) with self.output().open('w') as gc_plot_log: gc_plot_log.write(tmp)
def run(self): tmp = run_cmd([ 'read_duplication.py', '-i', '{0}/{1}.bam'.format(BamDir, self.sample), '-o', '{0}/read_duplication/{1}'.format(OutDir, self.sample) ]) with self.output().open('w') as read_duplication_log: read_duplication_log.write(tmp)
def run(self): log_dir = path.join(self.OutDir, 'logs') bam_process = path.join(self.OutDir, 'bam') tmp = run_cmd(['mkdir', '-p', log_dir, bam_process]) with self.output().open('w') as prepare_log: prepare_log.write(tmp)
def run(self): tmp = run_cmd([ 'Rscript', GO_ANALYSIS_R, '--quant_dir', QuantDir, '--go_anno', GoseqAnno, '--gene_length', GeneLen, '--topgo_anno', TopgoAnno, '--out_dir', OutDir ]) with self.output().open('w') as go_logs_inf: go_logs_inf.write(tmp)
def run(self): log_dir = path.join(OutDir, 'logs') assembly_dir = path.join(OutDir, 'assembly_dir') annotation_dir = path.join(OutDir, 'annotation') tmp = run_cmd(['mkdir', '-p', log_dir, assembly_dir, annotation_dir]) with self.output().open('w') as prepare_logs: prepare_logs.write(tmp)
def run(self): tmp = run_cmd([ 'Rscript', KALLISTO_TO_DIFF, '--quant_dir', '{}/kallisto'.format(OutDir), '--sample_inf', SampleInf, '--gene2tr', Gene2Tr, '--out_dir', OutDir ]) with self.output().open('w') as run_diff_logs: run_diff_logs.write(tmp)
def run(self): transcript_feature_cmd = [ 'python', TRANSCRIPT_FEATURE, '--gtf', ref_gtf, '--species', species_latin, '--out_dir', annotation_dir ] transcript_inf_table_log_inf = run_cmd(transcript_feature_cmd) with self.output().open('w') as transcript_inf_table_log: transcript_inf_table_log.write(transcript_inf_table_log_inf)
def run(self): tmp = run_cmd([ 'Rscript', ENRICH_BARPLOT_R, '--anno', GoseqAnno, '--table', '{0}/go/{1}'.format(OutDir, self.compare), '--diff', '{0}/differential_analysis/{1}'.format(QuantDir, self.compare), '--type', 'go', '--out', '{0}/go/{1}'.format(OutDir, self.compare) ]) with self.output().open('w') as go_plot_logs: go_plot_logs.write(tmp)
def run(self): tmp = run_cmd([ 'python', KEGG_ANALYSIS_PYTHON, '--blast_out', KEGGBlast, '--species', KEGGAbbr, '--background', KEGGBackground, '--diff_dir', '{}/differential_analysis/'.format(QuantDir), '--out_dir', '{}/kegg'.format(OutDir) ]) with self.output().open('w') as kegg_log_inf: kegg_log_inf.write(tmp)
def run(self): tin_dir = path.join(OutDir, 'tin') chdir(tin_dir) tin_inf = run_cmd([ 'tin.py', '-i', '{0}/{1}.bam'.format(BamDir, self.sample), '-r', BedFile ]) with self.output().open('w') as tin_log: tin_log.write(tin_inf)
def run(self): tmp = run_cmd([ 'Rscript', ENRICH_BARPLOT_R, '--anno', KEGGBlast, '--table', '{0}/kegg/{1}'.format(OutDir, self.compare), '--diff', '{0}/differential_analysis/{1}'.format(QuantDir, self.compare), '--type', 'kegg', '--out', '{0}/kegg/{1}'.format(OutDir, self.compare) ]) with self.output().open('w') as kegg_plog_logs: kegg_plog_logs.write(tmp)
def run(self): tmp = run_cmd([ 'java', '-jar', '{}'.format(GATK_PATH), '-T', 'VariantFiltration', '-window', '35', '-cluster', '3', '-filterName', 'FS', '-filter', 'FS > 30.0', '-filterName', 'QD', '-filter', 'QD < 2.0', '-R', '{}'.format(Ref), '-V', '{}/snp.raw.vcf'.format(OutDir), '-o', '{}/snp.filter.vcf'.format(OutDir) ]) with self.output().open('w') as snp_filter_log: snp_filter_log.write(tmp)
def run(self): report_tb_cmd = ['Rscript', QUANT_REPORT, '--quant_dir', OutDir, '--sample_inf', SampleInf] report_tb_inf = run_cmd(report_tb_cmd) with self.output().open('w') as report_log: report_log.write(report_tb_inf)