help="plot sample bar plot, if this params is set, group file will only for order") parser.add_argument('--contains_other', dest="contains_other", action='store_true', help="totel abundance contains other abundance ; totel aundance is 1") parser.add_argument('--top', dest="top",metavar="INT",type=int,default=20, help='set the top num, [default is 20]') parser.set_defaults(with_group=False) args = parser.parse_args() params = vars(args) params['group'] = parse_group_file(params['group']) return params if __name__ == '__main__': params = read_params(sys.argv) outputfile = params['outputfile'] dirname,filename,suffix =get_name(outputfile) inputfile = params['inputfile'] top = params['top'] title = params['title'] data = pd.DataFrame.from_csv(file=inputfile,sep="\t") data["sum"] = data.sum(axis=1) data = data.sort_values(by="sum",ascending=False) del data["sum"] data = data.ix[:top] data.to_csv("%s/for_plot.csv"%dirname,sep="\t") mkdir(os.path.split(outputfile)[0]) RscriptDir = const.Rscript r_job = Rparser() r_job.open("%s/02_barplot.R"%RscriptDir) vars = {"top":top, "infile": "%s/for_plot.csv"%dirname,
def cazy(config, name): print gettime("start 12.cazy") commands = [] work_dir = "%s/%s" % (os.path.dirname(config), name) commands.append("## whole cazy analysis") commands.append("rm %s/blat/all.m8" % work_dir) commands.append("cat %s/blat/* > %s/blat/all.m8" % (work_dir, work_dir)) commands.append(const.command_default + "python %s/01.get_anno_info.py -i %s/blat/all.m8 -o %s" % (cazy_bin_dir, work_dir, work_dir)) commands.append(const.command_default + "python %s/02.get_profile_and_count.py -a %s/cazy.anno.tsv -p %s/../06.gene_profile/gene.profile -l 5 -c class -o %s"%\ (cazy_bin_dir, work_dir, work_dir, work_dir)) commands.append(const.command_default +"python %s/02.get_profile_and_count.py -a %s/cazy.anno.tsv -p %s/../06.gene_profile/gene.profile -l 6 -c type -o %s"%\ (cazy_bin_dir, work_dir, work_dir, work_dir)) commands.append(const.command_default + "python %s/02.get_profile_and_count.py -a %s/cazy.anno.tsv -p %s/../06.gene_profile/gene.profile -l 9 -c enzyme -o %s"%\ (cazy_bin_dir, work_dir, work_dir, work_dir)) commands.append("## 1212.function_barplot") work_dir_12 = "%s/12.functional_barplot" % work_dir mkdir(work_dir_12) all_levels = ["class", "type", "enzyme"] for level in all_levels: commands.append( const.command_default + "Rscript %s/710_level1_barplot.R %s/%s.profile %s/%s_barplot.pdf %s" % (cazy_bin_dir, work_dir, level, work_dir_12, level, level)) commands.append( "convert -density 300 %s/%s_barplot.pdf %s/%s_barplot.png" % (work_dir_12, level, work_dir_12, level)) config_gene = ConfigParser() config_gene.read(config) group = re.split("\s+|\t|,", config_gene.get("param", "group")) #all_methods = ['cazy_class', 'cazy_protein', 'cazy_enzyme'] for subgroup in group: dirname, subgroup_name, _ = get_name(subgroup) sample_num_in_groups, min_sample_num_in_groups, sample_num_total, group_num = parse_group( subgroup) sub_work_dir = "%s/group/%s" % (work_dir, subgroup_name) commands.append( "## ----------------------------------%s----------------------------------##" % (subgroup_name)) if sample_num_total >= 5: work_dir_1202 = "%s/02.heatmap/" % sub_work_dir mkdir(work_dir_1202) commands.append("##heatmap") commands.append( const.command_default + "python %s/6_heatmap.py -i %s/type.profile -g %s -o %s" % (tools_dir, work_dir, subgroup, work_dir_1202)) work_dir_1203 = "%s/03.pca/" % sub_work_dir mkdir(work_dir_1203) commands.append("##pca") commands.append( const.command_default + "python %s/1_pca.py -i %s/type.profile -g %s -o %s" % (tools_dir, work_dir, subgroup, work_dir_1203)) work_dir_1204 = "%s/04.pcoa/" % sub_work_dir mkdir(work_dir_1204) commands.append("##pcoa") commands.append( const.command_default + "python %s/2_pcoa.py -i %s/type.profile -g %s -o %s" % (tools_dir, work_dir, subgroup, work_dir_1204)) if min_sample_num_in_groups >= 5: work_dir_1205 = "%s/05.nmds/" % sub_work_dir mkdir(work_dir_1205) commands.append("##nmds") commands.append( const.command_default + "python %s/3_nmds.py -i %s/type.profile -g %s -o %s" % (tools_dir, work_dir, subgroup, work_dir_1205)) work_dir_1206 = "%s/06.anosim/" % sub_work_dir mkdir(work_dir_1206) commands.append("##anosim") commands.append( const.command_default + "python %s/4_anosim.py -i %s/type.profile -g %s -o %s" % (tools_dir, work_dir, subgroup, work_dir_1206)) work_dir_1207 = "%s/07.mrpp/" % sub_work_dir mkdir(work_dir_1207) commands.append("##mrpp") commands.append( const.command_default + "python %s/5_mrpp.py -i %s/type.profile -g %s -o %s" % (tools_dir, work_dir, subgroup, work_dir_1207)) if group_num >= 6 and group_num < 30: work_dir_1208 = "%s/08.flower/" % sub_work_dir mkdir(work_dir_1208) commands.append("##flower") commands.append(const.command_default + "perl %s/7_flower.pl %s/type.profile %s %s" % (tools_dir, work_dir, subgroup, work_dir_1208)) elif group_num >= 2 and group_num < 6: work_dir_1208 = "%s/08.venn/" % sub_work_dir mkdir(work_dir_1208) commands.append("##venn") commands.append( const.command_default + "python %s/7_venn_flower.py -i %s/type.profile -o %s -g %s --with_group" % (tools_dir, work_dir, sub_work_dir, subgroup)) if min_sample_num_in_groups >= 5: work_dir_1209 = "%s/09.ko_wilcoxon/" % sub_work_dir mkdir(work_dir_1209) commands.append("##diff") commands.append(const.command_default + "%s/8_diff.py -i %s/type.profile -g %s -o %s" % (tools_dir, work_dir, subgroup, sub_work_dir)) commands.append("# diff boxplot") commands.append(const.command_default + "python %s/9_diff_boxplot.py -i %s/diff.marker.filter.profile.tsv -p %s/diff.marker.filter.tsv -g %s -o %s/diff_boxplot/"\ %(tools_dir, work_dir_1209, work_dir_1209, subgroup, work_dir_1209)) commands.append("# diff heatmap") commands.append(const.command_default + "python %s/6_heatmap.py -i %s/diff.marker.filter.profile.tsv -g %s -o %s/heatmap/"\ %(tools_dir, work_dir_1209, subgroup, work_dir_1209)) work_dir_1210 = "%s/10.lefse/" % sub_work_dir mkdir(work_dir_1210) commands.append("## lefse") commands.append(const.command_default + "python %s/603_LEfSe.py -i %s/type.profile -l /data_center_03/USER/huangy/soft/LEfSe_lzb -g %s -o %s --LDA 2"\ %(cazy_bin_dir, work_dir, subgroup, work_dir_1210)) commands.append("#lefse heatmap") commands.append(const.command_default + "python %s/6_heatmap.py -i %s/diff.marker.filter.profile.tsv -g %s -o %s/heatmap/"\ %(tools_dir, work_dir_1210, subgroup, work_dir_1210)) if group_num == 2 and min_sample_num_in_groups >= 5: #work_dir_1211 = "%s/11.metastats/" % sub_work_dir #mkdir(work_dir_1211) #commands.append("##metastats") #commands.append(const.command_default + "python %s/708_sample2profile.py -i %s/type.profile -g %s -o %s -f for_metastats.profile --num 100000"\ # %(cazy_bin_dir, work_dir, subgroup, sub_work_dir)) #commands.append(const.command_default + "Rscript %s/708_metastats.R %s/for_metastats.profile %s %s XX 0.05 TRUE"\ # %(cazy_bin_dir, work_dir, subgroup, work_dir_1211)) #commands.append("convert -density 300 %s/708_metastats_boxplot.pdf %s/708_metastats_boxplot.png"%(work_dir_1211,work_dir_1211)) #commands.append("# diff heatmap") #commands.append(const.command_default + "python %s/6_heatmap.py -i %s/diff.marker.filter.profile.tsv -g %s -o %s/heatmap/"\ # %(tools_dir, work_dir_1211, subgroup, work_dir_1211)) #work_dir_1212 = "%s/12.functional_barplot/" % sub_work_dir #mkdir(work_dir_1212) #commands.append("##711.function_barplot") #commands.append(const.command_default + "Rscript %s/702_level1_barplot_withgroup.R %s/class.profile %s/calss_barplot_withgroup.pdf class %s"\ # %(cazy_bin_dir, work_dir, work_dir_1211, subgroup)) #commands.append("convert -density 300 %s/calss_barplot_withgroup.pdf %s/calss_barplot_withgroup.png"%(work_dir_1211, work_dir_1211)) #commands.append(const.command_default + "Rscript %s/702_level1_barplot_withgroup.R %s/type.profile %s/type_barplot_withgroup.pdf type %s"\ # %(cazy_bin_dir, work_dir, work_dir_1211, subgroup)) #commands.append("convert -density 300 %s/type_barplot_withgroup.pdf %s/type_barplot_withgroup.png"%(work_dir_1211, work_dir_1211)) #commands.append(const.command_default + "Rscript %s/702_level1_barplot_withgroup.R %s/enzyme.profile %s/enzyme_barplot_withgroup.pdf enzyme %s"\ # %(cazy_bin_dir, work_dir, work_dir_1211, subgroup)) #commands.append("convert -density 300 %s/enzyme_barplot_withgroup.pdf %s/enzyme_barplot_withgroup.png"%(work_dir_1211, work_dir_1211)) work_dir_1213 = "%s/13.functional_clust/" % sub_work_dir mkdir(work_dir_1213) commands.append("##712 sample cluster") commands.append(const.command_default + "python %s/10_sample_clustering.py -i %s/type.profile -g %s -o %s -t \"Type Abundance in Samples\""\ %(tools_dir, work_dir, subgroup, work_dir_1213)) print gettime("end cazy") return commands
args = parser.parse_args() params = vars(args) return params if __name__ == '__main__': reload(sys) sys.setdefaultencoding('utf8') params = read_params(sys.argv) tool_default_dir = const.tool_default_dir mkdir(params['out_dir']) profile_table = params["profile_table"] group_file = params["group_file"] # work_dir = params["work_dir"] catalog_ko = params["catalog_ko"] dirname, subgroup_name, _ = get_name(group_file) group_names = os.popen("cut -f2 %s|sort|uniq" % group_file).read().strip().split("\n") if len(group_names) > 2: raise Exception("group number only 2") diff_ko = {} with open(profile_table, "r") as fqin: for line in fqin: if line.startswith("taxonname"): continue tabs = line.strip().split("\t") if tabs[5] == group_names[0] or tabs[5] == "1": diff_ko[tabs[0]] = 1 if tabs[5] == group_names[1] or tabs[5] == "-1": diff_ko[tabs[0]] = -1
def taxon(config,sh_default_file,outpath,name): print("start taxon :%s s"%time()) commands = [] work_dir = os.path.dirname(config) pyscript_dir = const.PYscript #updata config.yaml f = open(const.config_yaml) yl = yaml.load(f) f.close() yl["work_dir"]="%s/../"%work_dir yl["clean_reads_list"] = "02.tax/clean_read.list" fout = open('%s/config.yaml'%work_dir, "w") yaml.dump(yl,fout,default_flow_style=False) fout.close() os.system("cp %s %s/Snakefile"%(const.snakemake,work_dir)) os.system("cp %s %s/cluster.yaml"%(const.cluster_yaml,work_dir)) mkdir("%s/log/"%work_dir) mkdir("%s/log/align/"%work_dir) mkdir("%s/log/abund/"%work_dir) commands.append("## calculate abundance") commands.append("source activate gutbio") commands.append("snakemake --cluster-config cluster.yaml --cluster 'qsub -o {cluster.qsublog} -e {cluster.qsublog} -l vf={cluster.vf}' -j 10 --nolock --config clean_reads_list=\"02.taxon/clean_reads_list\"") commands.append("source deactivate") commands.append("## form species profile") commands.append("ls alignment/*/*species.abundance >list") commands.append("python /data_center_01/pipeline/huangy/metagenome/pyscript/02_taxnomy.py -i list") commands.append("rm list") mkdir("%s/profile/" % work_dir) commands.append("ls alignment/*/*species.abundance | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/species.profile") commands.append("ls alignment/*/*species.abundance2 | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/species.profile2") commands.append("ls alignment/*/*genus.abundance | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/genus.profile") commands.append("ls alignment/*/*class.abundance | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/class.profile") commands.append("ls alignment/*/*family.abundance | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/family.profile") commands.append("ls alignment/*/*order.abundance | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/order.profile") commands.append("ls alignment/*/*phylum.abundance | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/phylum.profile") commands.append("ls alignment/*/*all.abundance | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/all.profile") commands.append("echo '# Constructed from biom file' >profile/otu_table_L2.txt") commands.append("echo '# Constructed from biom file' >profile/otu_table_L3.txt") commands.append("echo '# Constructed from biom file' >profile/otu_table_L4.txt") commands.append("echo '# Constructed from biom file' >profile/otu_table_L5.txt") commands.append("echo '# Constructed from biom file' >profile/otu_table_L6.txt") commands.append("echo '# Constructed from biom file' >profile/otu_table_L7.txt") commands.append("cat profile/phylum.profile >> profile/otu_table_L2.txt") commands.append("cat profile/class.profile >> profile/otu_table_L3.txt") commands.append("cat profile/order.profile >> profile/otu_table_L4.txt") commands.append("cat profile/family.profile >> profile/otu_table_L5.txt") commands.append("cat profile/genus.profile >> profile/otu_table_L6.txt") commands.append("cat profile/species.profile2 >> profile/otu_table_L7.txt") commands.append("## use rate") commands.append("#mkdir use_rate") commands.append("#ls alignment/*/*MATCH |while read a; do echo \"perl /data_center_03/USER/zhongwd/rd/11_taxonomy_V2.0/bin/stat.pl < $a > $a.stat\" ;done > use_rate/stat.sh") commands.append("#nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --resource vf=1G --maxjob 10 --jobprefix ST --lines 1 --getmem use_rate/stat.sh &") commands.append("#ls alignment/*/*MATCH.stat | perl /data_center_03/USER/zhongwd/rd/11_taxonomy_V2.0/bin/stat_tab.pl - ../00.raw_reads/qc_final.stat.tsv > use_rate/stat.tsv") config_gene = ConfigParser() config_gene.read(config) group = re.split("\s+|\t",config_gene.get("param","group")) mkdir("%s/" % work_dir) mkdir("%s/group/" % work_dir) commands.append("## 00.piechart need finish") mkdir("%s/group/00.piechart"%(work_dir)) commands.append("ls alignment/*/*species.abundance | sed 's/alignment\/\(.*\)\/.*species.abundance/\\1/g' | while read a ; do perl /data_center_03/USER/zhongwd/rd/11_taxonomy_V2.0/test/pieplot/pie.pl < alignment/$a/$a.species.abundance > group/00.piechart/$a.species.pie.svg;done") commands.append("ls alignment/*/*genus.abundance | sed 's/alignment\/\(.*\)\/.*genus.abundance/\\1/g' | while read a ; do perl /data_center_03/USER/zhongwd/rd/11_taxonomy_V2.0/test/pieplot/pie.pl < alignment/$a/$a.genus.abundance > group/00.piechart/$a.genus.pie.svg;done") commands.append("## 03.accum") mkdir("%s/03.accum_share"%(work_dir)) commands.append("ln -s ../profile/genus.profile 03.accum_share/") commands.append("ln -s ../profile/species.profile 03.accum_share/") commands.append("perl /data_center_03/USER/zhongwd/rd/Finish/07_acumm_share_curve/Accumulated_Shared_Curve.pl -p 03.accum_share/genus.profile -c genus -t 100") commands.append("perl /data_center_03/USER/zhongwd/rd/Finish/07_acumm_share_curve/Accumulated_Shared_Curve.pl -p 03.accum_share/species.profile -c species -t 100") commands.append("## 04.rarecurve") mkdir("%s/04.rarecurve"%(work_dir)) commands.append("#list alignment/*/*MATCH > 04.rarecurve/match.list; sed 's/.*alignment\/\(.*\)\/.*MATCH/\\1/g' 04.rarecurve/match.list | paste - 04.rarecurve/match.list > 04.rarecurve/match.list.tmp; mv -f 04.rarecurve/match.list.tmp 04.rarecurve/match.list") commands.append("#nohup perl /data_center_03/USER/zhongwd/rd/05_rarecurve/RareCurve/RareCurve.pl -s clean_reads_list -m 04.rarecurve/match.list -d 04.rarecurve &") commands.append("## 06.ternaryplot") mkdir("%s/06.ternaryplot"%(work_dir)) commands.append("Rscript /data_center_01/pipeline/huangy/metagenome/Rscript/02_ternary.R profile/species.profile sample.list 06.ternaryplot/species.ternary.pdf species") commands.append("Rscript /data_center_01/pipeline/huangy/metagenome/Rscript/02_ternary.R profile/genus.profile sample.list 06.ternaryplot/genus.ternary.pdf genus") commands.append("## 07.treeplot") mkdir("%s/07.treeplot"%(work_dir)) commands.append("cut -f 1 clean_reads_list | while read a; do mkdir 07.treeplot/$a; perl /data_center_03/USER/zhongwd/temp/0106/tree/a.pl < alignment/$a/$a.species.abundance > 07.treeplot/$a/test.info 2> 07.treeplot/$a/test.tax; done") commands.append("cut -f 1 clean_reads_list | while read a; do cd 07.treeplot/$a; perl /data_center_03/USER/zhongwd/temp/0106/tree/zwd_newwick.pl < test.tax > test.tre; ~/anaconda_ete/bin/python /data_center_03/USER/zhongwd/temp/0106/tree/plottre.py; cd -; done") commands.append("## 08.cluster") mkdir("%s/08.cluster"%(work_dir)) commands.append("Rscript /data_center_03/USER/zhongwd/rd/11_taxonomy_V2.0/test/barplot/bartreeplot.r profile/species.profile sample.list 08.cluster/species.clust.pdf") for subgroup in group: dirname,subgroup_name,_ = get_name(subgroup) _,min_sample_num_in_groups,sample_num_total,group_num=parse_group(subgroup) mkdir("%s/group/%s"%(work_dir,subgroup_name)) commands.append("## 01.barplot need finish") mkdir("%s/group/%s/01.barplot"%(work_dir,subgroup_name)) commands.append("/data_center_01/pipeline/16S_ITS_pipeline_v3.0/script/02_bar_plot.py -t %s/profile/ -o %s/group/%s/01.barplot/ \ -g %s "%(work_dir,work_dir,subgroup_name,subgroup)) commands.append("/data_center_01/pipeline/16S_ITS_pipeline_v3.0/script/02_bar_plot.py -t %s/profile/ -o %s/group/%s/01.barplot/ \ -g %s --level 7 "%(work_dir,work_dir,subgroup_name,subgroup)) #commands.append("%s/02_bar_plot.py -i %s/profile/genus.profile -o %s/group/%s/01.barplot/genus.pdf \ #-g %s -t %s"%(pyscript_dir,work_dir,work_dir,subgroup_name,subgroup,"genus")) commands.append("## 02.core") mkdir("%s/group/%s/02.core"%(work_dir,subgroup_name)) commands.append("python %s/02_venn.py -i %s/profile/otu_table_L7.txt -o %s/group/%s/02.core/%s/ -g %s "\ %(pyscript_dir,work_dir,work_dir,subgroup_name,"species",subgroup)) commands.append("python %s/02_venn.py -i %s/profile/otu_table_L7.txt -o %s/group/%s/02.core/%s/ -g %s "\ %(pyscript_dir,work_dir,work_dir,subgroup_name,"genus",subgroup)) commands.append("## 05.top_boxplot") mkdir("%s/group/%s/05.top_boxplot"%(work_dir,subgroup_name)) commands.append("python %s/02_top.py -i %s/profile/all.profile -g %s -o %s/group/%s/05.top_boxplot/"%(pyscript_dir,work_dir,subgroup,work_dir,subgroup_name)) commands.append("## 09.pca") mkdir("%s/group/%s/09.pca"%(work_dir,subgroup_name)) commands.append("python %s/02_otu_pca.py -i %s/profile/species.profile -g %s -o %s/group/%s/09.pca --with_boxplot"%\ (pyscript_dir,work_dir,subgroup,work_dir,subgroup_name)) commands.append("python %s/02_otu_pca.py -i %s/profile/genus.profile -g %s -o %s/group/%s/09.pca --with_boxplot"%\ (pyscript_dir,work_dir,subgroup,work_dir,subgroup_name)) commands.append("## 11.anosim; 13.pcoa; 14.nmds") mkdir("%s/group/%s/11-14.beta_div"%(work_dir,subgroup_name)) mkdir("%s/group/%s/11-14.beta_div/species"%(work_dir,subgroup_name)) mkdir("%s/group/%s/11-14.beta_div/genus"%(work_dir,subgroup_name)) commands.append("cd group/%s/11-14.beta_div/species; perl /data_center_01/pipeline/huangy/metagenome/perlscript/02_Beta_diversity.pl -p ../../../../profile/species.profile -g %s -m bray -r; cd -"%(subgroup_name,subgroup)) commands.append("cd group/%s/11-14.beta_div/genus; perl /data_center_01/pipeline/huangy/metagenome/perlscript/02_Beta_diversity.pl -p ../../../../profile/genus.profile -g %s -m bray -r; cd -"%(subgroup_name,subgroup)) mkdir("%s/group/%s/15.LEfSe"%(work_dir,subgroup_name)) commands.append("python /data_center_01/pipeline/16S_ITS_pipeline_v3.0/script/05_filter_abundance.py -i %s/profile/species.profile \ -g %s -o %s/group/%s/15.LEfSe/ --cut_off 1e-5"%(work_dir,subgroup,work_dir,subgroup_name)) commands.append("python %s/02_LEfSe.py -i %s/group/%s/15.LEfSe/species.profile -l /data_center_03/USER/huangy/soft/LEfSe_lzb -g %s -o %s/group/%s/15.LEfSe/ --LDA 2"\ %(pyscript_dir,work_dir,subgroup_name,subgroup,work_dir,subgroup_name)) print("end taxon :%s s"%time()) return commands