def run_kegg_pathview(self): cmd_list = [] pathway_log_dir = path.join(self.out_dir, 'kegg_pathway_logs') python_tools.circ_mkdir_unix(pathway_log_dir) compare_list = listdir(self.diff_dir) for each_compare in compare_list: each_compare_diff_dir = path.join(self.diff_dir, each_compare) each_compare_out_dir = path.join(self.out_dir, each_compare) diff_out_list = glob( '{}/*.edgeR.DE_results.txt'.format(each_compare_diff_dir)) for each_diff_file in diff_out_list: each_diff_file_name = path.basename(each_diff_file) each_out_prefix = each_diff_file_name.split( '.edgeR.DE_results')[0] if 'UP' not in each_out_prefix: each_out_prefix = '{}.ALL'.format(each_out_prefix) kegg_output = path.join( each_compare_out_dir, '%s.kegg.enrichment.txt' % (each_out_prefix)) pathway_outdir = path.join( each_compare_out_dir, '%s.pathway' % each_out_prefix) pathview_check_log_file = path.join( pathway_log_dir, '%s.log' % (each_out_prefix)) pathview_cmd = 'python %s --kegg_table %s --blast_out %s --species %s --diff_out %s --out_dir %s' % ( PATHVIEW, kegg_output, self.all_blast_out, self.species, each_diff_file, pathway_outdir) pathview_check_cmd = 'python %s --kegg_table %s --pathway_dir %s --log_file %s' % ( PATHVIEW_CK, kegg_output, pathway_outdir, pathview_check_log_file) python_tools.circ_mkdir_unix(pathway_outdir) python_tools.circ_call_process(pathview_cmd) python_tools.circ_call_process(pathview_check_cmd) cmd_list.extend([pathview_cmd, pathview_check_cmd]) return cmd_list
def KEGG_enrich(self): if self.database == 'NCBI' : cmd = 'run_kobas.py -i {self.diff_list} -t id:ncbigene -s {self.species} -d K -o {self.output}'.format(**locals()) else : cmd = 'run_kobas.py -i {self.compare_blast_out} -t blastout:tab -s {self.species} -d K -o {self.output}'.format(**locals()) if self.auot_run : python_tools.circ_call_process(cmd) return cmd
def get_blast_out(self): blast_bin = BLAST_BIN self.ko_seq = os.path.join(KO_PEP_DIR,'%s.pep.fasta' % self.species) self.check_blast_database() blast_pr = self.check_blast_program() cmd = '{blast_bin}/{blast_pr} -query {self.seq} -db {self.ko_seq} -evalue 1e-5 -outfmt 6 -max_target_seqs 1 -num_threads 1 -out {self.all_blast_out}'.format(**locals()) if self.auot_run : python_tools.circ_call_process(cmd) return cmd
def kallisto_quant(self): fq_cmd = ' '.join(self.fq_list) if len(self.fq_list) > 1 : cmd = '%s quant -i %s -o %s %s' % (self.kallisto,self.index,self.out_dir,fq_cmd) python_tools.circ_call_process(cmd) else : single_length = 2*self.fq_length cmd = '%s quant -i %s -o %s --single -l %s -s %s --plaintext -t %s %s ' % (self.kallisto,self.index,self.out_dir,self.fq_length,self.sd,self.thread,fq_cmd) python_tools.circ_call_process(cmd) quant_log = os.path.join(self.out_dir,'quant.cmd.log') python_tools.write_obj_to_file(cmd,quant_log) return cmd
def treat_KEGG_table(self, kegg_output): kegg_out_dir, kegg_out_name = path.split(kegg_output) kegg_tmp_file = path.join(kegg_out_dir, 'tmp.%s' % kegg_out_name) system('mv %s %s' % (kegg_output, kegg_tmp_file)) if self.check_KOBAS_out(kegg_tmp_file): kegg_out_info = open(kegg_output, 'w') with open(kegg_tmp_file, 'r') as kegg_tmp_file_info: count = 0 for eachline in kegg_tmp_file_info: if len(eachline.strip().split('\t')) == 9: if count == 0 and eachline.startswith("#"): kegg_out_info.write(eachline) count += 1 elif not eachline.startswith("#"): kegg_out_info.write(eachline) kegg_out_info.close() python_tools.circ_call_process('rm %s' % (kegg_tmp_file))
def run_KEGG_enrich(self): cmd_list = [] blast_out_dir = path.join(self.out_dir, 'blast_out') python_tools.circ_mkdir_unix(blast_out_dir) compare_list = listdir(self.diff_dir) for each_compare in compare_list: each_compare_diff_dir = path.join(self.diff_dir, each_compare) diff_gene_list = glob( '{}/*.diffgenes.txt'.format(each_compare_diff_dir)) each_compare_out_dir = path.join(self.out_dir, each_compare) python_tools.circ_mkdir_unix(each_compare_out_dir) for each_diff_file in diff_gene_list: each_diff_file_name = path.basename(each_diff_file) each_out_prefix = each_diff_file_name.split( '.edgeR.DE_results')[0] each_diff_inf_prefix = each_out_prefix if 'UP' not in each_out_prefix: each_diff_inf_prefix = each_out_prefix.split('.')[0] each_diff_inf_file = path.join(each_compare_diff_dir,'{}.edgeR.DE_results.txt'.format(each_diff_inf_prefix)) kegg_output = path.join( each_compare_out_dir, '%s.kegg.enrichment.txt' % (each_out_prefix)) each_blast_out = path.join( blast_out_dir, '%s.blasttab' % (each_out_prefix)) extract_each_blast_cmd = 'python %s --id %s --table %s --output %s' % ( EXTRACT_INF_BY_ID, each_diff_file, self.all_blast_out, each_blast_out) kegg_cmd = self.generate_kobas(each_blast_out, kegg_output) python_tools.circ_call_process(extract_each_blast_cmd) cmd_list.append(extract_each_blast_cmd) if path.exists(each_blast_out): python_tools.circ_call_process(kegg_cmd) cmd_list.append(kegg_cmd) if path.exists(kegg_output): self.treat_KEGG_table(kegg_output) txt_to_excel(kegg_output) #pathway_cmd = self.run_kegg_pathview2(each_compare, each_diff_inf_file) #cmd_list.extend(pathway_cmd) else: cmd_list.append( "## {} not exists!".format(kegg_output)) else: cmd_list.append("## {} not exists!".format(each_blast_out)) return cmd_list
def plot_pathview(species, pathview_id, each_pathway_kegg_fc_out, out_dir): cmd = 'Rscript %s %s %s %s %s' % (pathview_script, species, pathview_id, each_pathway_kegg_fc_out, out_dir) python_tools.circ_call_process(cmd) os.system('rm %s' % each_pathway_kegg_fc_out)
def run_plot(self) : cmd = '%s/Rscript %s %s %s %s %s' % (R_BIN_3_1,ENRICH_BAR,self.compare_name,self.output,'KEGG',self.out_dir) if self.auot_run : python_tools.circ_call_process(cmd) return cmd
def run_plot(self) : out_dir = os.path.split(self.output)[0] cmd = '%s/Rscript %s %s %s %s %s' % (R_BIN_3_1,ENRICH_BAR,self.compare_name,self.output,'GO',out_dir) if self.auot_run : python_tools.circ_call_process(cmd) return cmd
def run_goseq_enrich(self) : cmd = '%s/Rscript %s %s %s %s %s' % (R_BIN,GOSEQ,self.target_list,self.target_length,self.go,self.output) if self.auot_run : python_tools.circ_call_process(cmd) return cmd
def kallisto_index(self): cmd = '%s index -i %s %s' % (self.kallisto,self.index,self.transcript_fa) python_tools.circ_call_process(cmd) run_log = os.path.join(self.out_dir,'index.cmd.log') python_tools.write_obj_to_file(cmd,run_log) return cmd
''' import sys import python_tools from Bio import SeqIO import os if not len(sys.argv) == 4: print 'python ' + sys.argv[0] + ' miRNA_mature mRNA_sequence output_dir' sys.exit(0) miRNA_mature = sys.argv[1] mRNA_seq = sys.argv[2] output_dir = sys.argv[3] output_file = os.path.join(output_dir, 'miRNA_targetfinder_results.txt') cmd_file = os.path.join(output_dir, 'miRNA_target_cmd.sh') cmd_file_info = open(cmd_file, 'w') for seq_record in SeqIO.parse(miRNA_mature, "fasta"): seq_name = seq_record.id seq = str(seq_record.seq) cmd_file_info.write( 'targetfinder.pl -s {seq} -d {mRNA_seq} -p table -q {seq_name}\n'. format(**locals())) cmd_file_info.close() cmd = 'nohup sh {cmd_file} 1>{output_file} 2>{cmd_file}.log &'.format( **locals()) python_tools.circ_call_process(cmd)
sample_data_dict = {} seq_dir_name_list = args.seq_dir_name.split(',') for each_name in seq_dir_name_list: each_dir = os.path.join(args.seq_data_dir, each_name) each_dir_files = os.listdir(each_dir) for each_file in each_dir_files: each_file_path = os.path.join(each_dir, each_file) if os.path.isdir(each_file_path): wgc_id = each_file sample_id = sample_map_dict[wgc_id] seq_files = os.listdir(each_file_path) if sample_id not in sample_data_dict: sample_data_dict[sample_id] = RNAseq_tools.rawdata() sample_data_dict[sample_id].name = sample_id for each_seq_file in seq_files: each_seq_path = os.path.join(each_file_path, each_seq_file) if each_seq_file.endswith('R1.fastq.gz'): sample_data_dict[sample_id].read1.append(each_seq_path) elif each_seq_file.endswith('R2.fastq.gz'): sample_data_dict[sample_id].read2.append(each_seq_path) else: pass merge_cmd = os.path.join(args.analysis_data_dir, 'get_analysis_data.sh') for each in sample_data_dict: cmd_line = sample_data_dict[each].merge_rawdata(args.analysis_data_dir) python_tools.write_obj_to_file(cmd_line, merge_cmd, True) os.system('chmod +x %s' % merge_cmd) python_tools.circ_call_process(merge_cmd)