コード例 #1
0
ファイル: run_kegg.py プロジェクト: zhimenggan/RNAseq-4
 def run_kegg_pathview(self):
     cmd_list = []
     pathway_log_dir = path.join(self.out_dir, 'kegg_pathway_logs')
     python_tools.circ_mkdir_unix(pathway_log_dir)
     compare_list = listdir(self.diff_dir)
     for each_compare in compare_list:
         each_compare_diff_dir = path.join(self.diff_dir, each_compare)
         each_compare_out_dir = path.join(self.out_dir, each_compare)
         diff_out_list = glob(
             '{}/*.edgeR.DE_results.txt'.format(each_compare_diff_dir))
         for each_diff_file in diff_out_list:
             each_diff_file_name = path.basename(each_diff_file)
             each_out_prefix = each_diff_file_name.split(
                 '.edgeR.DE_results')[0]
             if 'UP' not in each_out_prefix:
                 each_out_prefix = '{}.ALL'.format(each_out_prefix)
             kegg_output = path.join(
                 each_compare_out_dir, '%s.kegg.enrichment.txt' % (each_out_prefix))
             pathway_outdir = path.join(
                 each_compare_out_dir, '%s.pathway' % each_out_prefix)
             pathview_check_log_file = path.join(
                 pathway_log_dir, '%s.log' % (each_out_prefix))
             pathview_cmd = 'python %s --kegg_table %s --blast_out %s --species %s --diff_out %s --out_dir %s' % (
                 PATHVIEW, kegg_output, self.all_blast_out, self.species, each_diff_file, pathway_outdir)
             pathview_check_cmd = 'python %s --kegg_table %s --pathway_dir %s --log_file %s' % (
                 PATHVIEW_CK, kegg_output, pathway_outdir, pathview_check_log_file)
             python_tools.circ_mkdir_unix(pathway_outdir)
             python_tools.circ_call_process(pathview_cmd)
             python_tools.circ_call_process(pathview_check_cmd)
             cmd_list.extend([pathview_cmd, pathview_check_cmd])
     return cmd_list
コード例 #2
0
ファイル: RNAseq_tools.py プロジェクト: zhimenggan/pipe_v1.1
 def KEGG_enrich(self):        
     if self.database == 'NCBI' :
         cmd = 'run_kobas.py -i {self.diff_list}  -t id:ncbigene -s {self.species} -d K -o {self.output}'.format(**locals())
     else :
         cmd = 'run_kobas.py -i {self.compare_blast_out}  -t blastout:tab -s {self.species} -d K -o {self.output}'.format(**locals())
     if self.auot_run :
         python_tools.circ_call_process(cmd)
     return cmd    
コード例 #3
0
ファイル: RNAseq_tools.py プロジェクト: zhimenggan/pipe_v1.1
 def get_blast_out(self):
     blast_bin = BLAST_BIN
     self.ko_seq = os.path.join(KO_PEP_DIR,'%s.pep.fasta' % self.species)
     self.check_blast_database()
     blast_pr = self.check_blast_program()
     cmd = '{blast_bin}/{blast_pr}  -query {self.seq} -db {self.ko_seq} -evalue 1e-5 -outfmt 6 -max_target_seqs 1 -num_threads 1 -out {self.all_blast_out}'.format(**locals())
     if self.auot_run :
         python_tools.circ_call_process(cmd)
     return cmd
コード例 #4
0
ファイル: RNAseq_tools.py プロジェクト: zhimenggan/pipe_v1.1
 def kallisto_quant(self):
     fq_cmd = ' '.join(self.fq_list)
     if len(self.fq_list) > 1 :  
         cmd = '%s quant -i %s -o %s %s' % (self.kallisto,self.index,self.out_dir,fq_cmd)          
         python_tools.circ_call_process(cmd)
     else :
         single_length = 2*self.fq_length
         cmd = '%s quant -i %s -o %s --single -l %s -s %s --plaintext -t %s %s ' % (self.kallisto,self.index,self.out_dir,self.fq_length,self.sd,self.thread,fq_cmd)
         python_tools.circ_call_process(cmd)
     quant_log = os.path.join(self.out_dir,'quant.cmd.log')
     python_tools.write_obj_to_file(cmd,quant_log)
     return cmd
コード例 #5
0
ファイル: run_kegg.py プロジェクト: zhimenggan/RNAseq-4
 def treat_KEGG_table(self, kegg_output):
     kegg_out_dir, kegg_out_name = path.split(kegg_output)
     kegg_tmp_file = path.join(kegg_out_dir, 'tmp.%s' % kegg_out_name)
     system('mv %s %s' % (kegg_output, kegg_tmp_file))
     if self.check_KOBAS_out(kegg_tmp_file):
         kegg_out_info = open(kegg_output, 'w')
         with open(kegg_tmp_file, 'r') as kegg_tmp_file_info:
             count = 0
             for eachline in kegg_tmp_file_info:
                 if len(eachline.strip().split('\t')) == 9:
                     if count == 0 and eachline.startswith("#"):
                         kegg_out_info.write(eachline)
                         count += 1
                     elif not eachline.startswith("#"):
                         kegg_out_info.write(eachline)
         kegg_out_info.close()
     python_tools.circ_call_process('rm %s' % (kegg_tmp_file))
コード例 #6
0
ファイル: run_kegg.py プロジェクト: zhimenggan/RNAseq-4
 def run_KEGG_enrich(self):
     cmd_list = []
     blast_out_dir = path.join(self.out_dir, 'blast_out')
     python_tools.circ_mkdir_unix(blast_out_dir)
     compare_list = listdir(self.diff_dir)
     for each_compare in compare_list:
         each_compare_diff_dir = path.join(self.diff_dir, each_compare)
         diff_gene_list = glob(
             '{}/*.diffgenes.txt'.format(each_compare_diff_dir))
         each_compare_out_dir = path.join(self.out_dir, each_compare)
         python_tools.circ_mkdir_unix(each_compare_out_dir)
         for each_diff_file in diff_gene_list:
             each_diff_file_name = path.basename(each_diff_file)
             each_out_prefix = each_diff_file_name.split(
                 '.edgeR.DE_results')[0]
             each_diff_inf_prefix = each_out_prefix
             if 'UP' not in each_out_prefix:
                 each_diff_inf_prefix = each_out_prefix.split('.')[0]
             each_diff_inf_file = path.join(each_compare_diff_dir,'{}.edgeR.DE_results.txt'.format(each_diff_inf_prefix))
             kegg_output = path.join(
                 each_compare_out_dir, '%s.kegg.enrichment.txt' % (each_out_prefix))
             each_blast_out = path.join(
                 blast_out_dir, '%s.blasttab' % (each_out_prefix))
             extract_each_blast_cmd = 'python %s --id %s --table %s --output %s' % (
                 EXTRACT_INF_BY_ID, each_diff_file, self.all_blast_out, each_blast_out)
             kegg_cmd = self.generate_kobas(each_blast_out, kegg_output)
             python_tools.circ_call_process(extract_each_blast_cmd)
             cmd_list.append(extract_each_blast_cmd)
             if path.exists(each_blast_out):
                 python_tools.circ_call_process(kegg_cmd)
                 cmd_list.append(kegg_cmd)
                 if path.exists(kegg_output):
                     self.treat_KEGG_table(kegg_output)
                     txt_to_excel(kegg_output)
                     #pathway_cmd = self.run_kegg_pathview2(each_compare, each_diff_inf_file)
                     #cmd_list.extend(pathway_cmd)
                 else:
                     cmd_list.append(
                         "## {} not exists!".format(kegg_output))
             else:
                 cmd_list.append("## {} not exists!".format(each_blast_out))
     return cmd_list
コード例 #7
0
ファイル: kegg_pathview.py プロジェクト: zhimenggan/RNAseq-4
def plot_pathview(species, pathview_id, each_pathway_kegg_fc_out, out_dir):
    cmd = 'Rscript %s %s %s %s %s' % (pathview_script, species, pathview_id,
                                      each_pathway_kegg_fc_out, out_dir)
    python_tools.circ_call_process(cmd)
    os.system('rm %s' % each_pathway_kegg_fc_out)
コード例 #8
0
ファイル: RNAseq_tools.py プロジェクト: zhimenggan/pipe_v1.1
 def run_plot(self) :
     cmd = '%s/Rscript %s %s %s %s %s' % (R_BIN_3_1,ENRICH_BAR,self.compare_name,self.output,'KEGG',self.out_dir)
     if self.auot_run :
         python_tools.circ_call_process(cmd)
     return cmd
コード例 #9
0
ファイル: RNAseq_tools.py プロジェクト: zhimenggan/pipe_v1.1
 def run_plot(self) :
     out_dir = os.path.split(self.output)[0]
     cmd = '%s/Rscript %s %s %s %s %s' % (R_BIN_3_1,ENRICH_BAR,self.compare_name,self.output,'GO',out_dir)
     if self.auot_run :
         python_tools.circ_call_process(cmd)
     return cmd
コード例 #10
0
ファイル: RNAseq_tools.py プロジェクト: zhimenggan/pipe_v1.1
 def run_goseq_enrich(self) :
     cmd = '%s/Rscript %s %s %s %s %s' % (R_BIN,GOSEQ,self.target_list,self.target_length,self.go,self.output)
     if self.auot_run :
         python_tools.circ_call_process(cmd)
     return cmd
コード例 #11
0
ファイル: RNAseq_tools.py プロジェクト: zhimenggan/pipe_v1.1
 def kallisto_index(self):
     cmd = '%s index -i %s %s' % (self.kallisto,self.index,self.transcript_fa)
     python_tools.circ_call_process(cmd)
     run_log = os.path.join(self.out_dir,'index.cmd.log')
     python_tools.write_obj_to_file(cmd,run_log)
     return cmd
コード例 #12
0
'''

import sys
import python_tools
from Bio import SeqIO
import os

if not len(sys.argv) == 4:
    print 'python ' + sys.argv[0] + ' miRNA_mature mRNA_sequence output_dir'
    sys.exit(0)

miRNA_mature = sys.argv[1]
mRNA_seq = sys.argv[2]
output_dir = sys.argv[3]

output_file = os.path.join(output_dir, 'miRNA_targetfinder_results.txt')
cmd_file = os.path.join(output_dir, 'miRNA_target_cmd.sh')

cmd_file_info = open(cmd_file, 'w')
for seq_record in SeqIO.parse(miRNA_mature, "fasta"):
    seq_name = seq_record.id
    seq = str(seq_record.seq)
    cmd_file_info.write(
        'targetfinder.pl -s {seq} -d {mRNA_seq} -p table -q {seq_name}\n'.
        format(**locals()))
cmd_file_info.close()

cmd = 'nohup sh {cmd_file} 1>{output_file} 2>{cmd_file}.log &'.format(
    **locals())
python_tools.circ_call_process(cmd)
コード例 #13
0
sample_data_dict = {}
seq_dir_name_list = args.seq_dir_name.split(',')
for each_name in seq_dir_name_list:
    each_dir = os.path.join(args.seq_data_dir, each_name)
    each_dir_files = os.listdir(each_dir)
    for each_file in each_dir_files:
        each_file_path = os.path.join(each_dir, each_file)
        if os.path.isdir(each_file_path):
            wgc_id = each_file
            sample_id = sample_map_dict[wgc_id]
            seq_files = os.listdir(each_file_path)
            if sample_id not in sample_data_dict:
                sample_data_dict[sample_id] = RNAseq_tools.rawdata()
                sample_data_dict[sample_id].name = sample_id
            for each_seq_file in seq_files:
                each_seq_path = os.path.join(each_file_path, each_seq_file)
                if each_seq_file.endswith('R1.fastq.gz'):
                    sample_data_dict[sample_id].read1.append(each_seq_path)
                elif each_seq_file.endswith('R2.fastq.gz'):
                    sample_data_dict[sample_id].read2.append(each_seq_path)
                else:
                    pass

merge_cmd = os.path.join(args.analysis_data_dir, 'get_analysis_data.sh')
for each in sample_data_dict:
    cmd_line = sample_data_dict[each].merge_rawdata(args.analysis_data_dir)
    python_tools.write_obj_to_file(cmd_line, merge_cmd, True)

os.system('chmod +x %s' % merge_cmd)
python_tools.circ_call_process(merge_cmd)