Exemple #1
0
def gene_predict(config, name):
    print gettime("start 04.gene_predict")
    commands = []
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    config_gene = ConfigParser()
    config_gene.read(config)
    ins_list = config_gene.get("param", "ins_list")
    mkdir(work_dir)
    commands.append("ls gene/*fna | perl %s/stat.pl > orf.stat.tsv" %
                    bin_gene_predict_default_dir)
    commands.append(
        "ls gff/*gff | sed 's/.gff//g' | while read a ; do gzip -c $a.gff > $a.gff.gz;done"
    )
    commands.append("ls gene/*fna | sed 's/.fna//g' | while read a ; do perl %s/cds2pep.pl $a.fna $a.faa; gzip -c $a.fna > $a.fna.gz; gzip -c $a.faa > $a.faa.gz; done"\
                    %tool_default_dir)
    commands.append("## histogram")
    mkdir("%s/histogram/" % work_dir)
    commands.append(
        "cut -f 1 gene.list | while read a; do /data_center_03/USER/zhongwd/bin/lengthfasta gene/$a.gene.fna > histogram/$a.gene.length; done"
    )
    commands.append("cut -f 1 gene.list | while read a; do Rscript %s/gene.histogram.R histogram/$a.gene.length histogram/$a.gene.histogram.pdf; done"\
                    %bin_gene_predict_default_dir)
    commands.append(
        "cut -f 1 gene.list | while read a; do convert -density 300 histogram/$a.gene.histogram.pdf histogram/$a.gene.histogram.png; done"
    )
    print gettime("end 04.gene_predict")
    return commands
def mgs(config, name):
    print gettime("stat 10.mgs")
    commands=[]
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    material_dir = '%s/material' % os.path.dirname(config)
    if os.path.isdir(work_dir):
        pass
    else:
        mkdir(work_dir)
    
    config_group = ConfigParser()
    config_group.read(config)
    group = re.split('\s+|,\s*|\t+|,\t*|', config_group.get('param','group'))

    for (i,subgroup_name) in enumerate(group):
        #subgroup_filename = '0' + str((i+1)) + '.' + subgroup_name
        subgroup_filename = subgroup_name
        mkdir("%s/%s" % (work_dir, subgroup_filename))
        
        sample_num_in_groups, min_sample_num_in_groups, sample_num_total, group_num = parse_group("%s/%s_group.list" % (material_dir, subgroup_name))
        if min_sample_num_in_groups >= 5 and sample_num_total >= 20 and group_num == 2:
            os.system("cp %s/%s_group.list %s/%s/group.list" % (material_dir, subgroup_name, work_dir, subgroup_filename))
            commands.append("## mgs start")
            commands.append('ls | while read a; do if [ -f "$a/group.list" ];then python %s/full_MGS_llf.py -p ../../06.gene_profile/gene.profile -g $a/group.list -d $a/; fi; done' % (bin_mgs_default_dir))
            commands.append('ls | while read a; do if [ -f "$a/group.list" ];then cd $a;sh work.sh;cd -; fi; done')
            commands.append('ls | while read a; do if [ -f "$a/group.list" ];then python %s/mgs_taxonomy.py -i $a/pathway/ -g ../05.gene_catalog/gene_catalog.fna -o $a/taxonomy/ --group $a/group.list; fi; done' % (bin_mgs_default_dir))
            commands.append('ls | while read a; do if [ -f "$a/group.list" ];then cd $a/taxonomy/;sh mgs_taxonomy.sh;cd -; fi; done')

        else:
            log = open("%s/%s/Sample_not_enough.log" % (work_dir, subgroup_filename),"w+")
            log.write("min_sample_num_in_groups >= 5 and sample_num_total >= 20 and group_num == 2")
            log.close
    return commands
Exemple #3
0
def use_old_version(config, name):
    print gettime('start create old version step script')
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/preprocess_for_taxon_profile/old_version' % (main_dir, name)
    mkdir(work_dir, '%s/profile'%work_dir)
    commands.append('## calculate abundance')
    commands.append('cp %s/01.clean_reads/clean_reads.list ./' % main_dir)
    commands.append('%s/speciesabundance.pl %s/01.clean_reads/clean_reads.list .' % (bin_dir, main_dir))
    commands.append('nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 15G --jobs 10 --prefix MA --lines 1 shell/match.sh &')
    commands.append('nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 15G --jobs 10 --prefix AB --lines 2 shell/abun.sh &')
    commands.append('## form species profile')
    commands.append('ls alignment/*/*root.abundance >abund.list')
    #commands.append('python %s/02_taxonomy.py -d . -c ../%s/qc_%s.stat.tsv' % (bin_dir, raw_dir_name, batch_num))
    commands.append('python %s/02_taxonomy.py -i abund.list' % bin_dir)
    commands.append('rm abund.list')
    commands.append('for i in all phylum class order family genus species; do ls alignment/*/*$i.abundance |perl %s/201_profile - >profile/$i.profile; done' % bin_dir)
    #commands.append('cut -f1 %s/materials/sample.list |while read a; do ls alignment/$a/*phylum.abundance; done | profile - >profile/phylum.profilea'%main_dir)
    commands.append('num=1;for i in phylum class order family genus species; do let num=num+1; python %s/201_profile_convert.py -i profile/$i.profile -o profile/otu_table_L$num.txt; done' % bin_dir)
    commands.append('ls profile/* | while read a; do cp $a ../../taxon_profile; done')
    commands.append('## reads use rate')
    commands.append('#ls alignment/*/*.MATCH.logs >match_logs.list')
    commands.append('#python %s/201_use_rate.py -i match_logs.list -o use_rate.stat.tsv -clean %s/00.raw_reads/qc_stat.tsv' % (bin_dir, main_dir))
    commands.append('#rm match_logs.list')
    return work_dir, commands
def gene_profile(config,sh_default_file,outpath,name):
    commands = []
    work_dir = os.path.dirname(config)
    commands.append("/data_center_01/pipeline/huangy/metagenome/perlscript/06_geneabundance clean_reads_list gene_catalog.list gene_catalog.length")
    commands.append("nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --resource vf=10G --maxjob 10 --jobprefix MA --lines 1 --getmem shell_alignment/match.sh &")
    commands.append("nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --resource vf=10G --maxjob 10 --jobprefix AB --lines 2 --getmem shell_alignment/abun.sh &")
    commands.append("ls alignment/*/*abundance |/data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > gene.profile")
    commands.append("/data_center_01/pipeline/huangy/metagenome/perlscript/06_shannon gene.profile gene.alpha.div.tsv")
    commands.append("head -4 gene.profile | sed '1s/^/Gene ID/g' > example.gene.profile.tsv")
    commands.append("Rscript /data_center_01/pipeline/huangy/metagenome/Rscript/06_geneset.R")
    commands.append("#差异分析")
    config_gene = ConfigParser()
    config_gene.read(config)
    group = re.split("\s+|\t",config_gene.get("param","group"))
    mkdir("%s/group/" % work_dir)
    for subgroup in group:
        subgroup = os.path.basename(subgroup)
        subgroup_split =os.path.splitext(subgroup)[0]
        mkdir("%s/group/%s/"%(work_dir,subgroup_split))
        commands.append("python /data_center_01/pipeline/huangy/metagenome/pyscript/convert_abundance_group.py gene.profile ../group/%s group/%s/gene.profile genus" % (subgroup,subgroup_split))
        commands.append("/data_center_01/pipeline/16S_ITS_pipeline_v3.0/script/03_otu_pca.py -i group/%s/gene.profile -g ../group/%s -o group/%s/09.pca --with_boxplot" % (subgroup_split,subgroup,subgroup_split))
        mkdir("%s/group/%s/11-14.beta_div/"%(work_dir,subgroup_split))
        mkdir("%s/group/%s/11-14.beta_div/gene/"%(work_dir,subgroup_split))
        commands.append("cd group/%s/11-14.beta_div/gene; perl /data_center_01/pipeline/huangy/metagenome/perlscript/02_Beta_diversity.pl -p ../../../../group/%s/gene.profile -g ../../../../../group/%s -m bray -r; cd -" %(subgroup_split,subgroup_split,subgroup))
        mkdir("%s/group/%s/15.LEfSe/" % (work_dir,subgroup_split))
        commands.append("/data_center_01/pipeline/16S_ITS_pipeline_v3.0/script/05_tax_diff.py -i group/%s/gene.profile -o group/%s/gene_diff/ -g ../group/%s -c 0.05"%(subgroup_split,subgroup_split,subgroup))
        commands.append("/data_center_01/pipeline/16S_ITS_pipeline_v3.0/script/05_diff_pca.py -i group/%s/gene_diff/profile.for_plot.txt -o group/%s/gene_diff/pca -g ../group/%s" %(subgroup_split,subgroup_split,subgroup))
        commands.append("/data_center_01/pipeline/16S_ITS_pipeline_v3.0/script/03_tax_heatmap.py -f group/%s/gene_diff/profile.for_plot.txt -o group/%s/gene_diff/heatmap -g ../group/%s -t 30" % (subgroup_split,subgroup_split,subgroup))
        commands.append(" /data_center_01/pipeline/16S_ITS_pipeline_v3.0/script/05_diff_boxplot.py -i group/%s/gene_diff/profile.for_plot.txt -o group/%s/gene_diff/boxplot -g ../group/%s -t 20"%(subgroup_split,subgroup_split,subgroup))
        #commands.append("/data_center_01/pipeline/16S_ITS_pipeline_v3.0/script/05_LEfSe.py -i group/%s/gene.profile -l /data_center_03/USER/huangy/soft/LEfSe_lzb -g ../group/%s -o group/%s/15.LEfSe/ --LDA 2" %(subgroup_split,subgroup,subgroup_split))
    return commands
Exemple #5
0
def gene_profile_pre(config, name):
    print gettime("start 06.gene_profile_pre")
    commands = []
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("cp %s/../01.clean_reads/clean_reads.list %s/clean_reads.list"\
                    %(work_dir,work_dir))
    commands.append("## build index")
    mkdir("%s/database/" % work_dir)
    commands.append(
        "#ln -s %s/../05.gene_catalog/gene_catalog.fna %s/database/" %
        (work_dir, work_dir))
    commands.append("#2bwt-builder %s/database/gene_catalog.fna" % work_dir)
    commands.append("python %s/genebuild.py -d %s" %
                    (bin_gene_profile_default_dir, work_dir))
    commands.append(
        "/data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 10G --jobs 1 --prefix BI --lines 1 shell/2bwt_builder.sh"
    )
    commands.append("## calculate gene abundance")
    commands.append("perl %s/geneabundance.pl %s/clean_reads.list database/gene_catalog.fna %s/../05.gene_catalog/gene_catalog.length %s/"\
                    %(bin_gene_profile_default_dir,work_dir,work_dir,work_dir))

    commands.append(
        "/data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 10G --jobs 50 --prefix MA --lines 1 shell/match.sh"
    )
    commands.append(
        "/data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 10G --jobs 10 --prefix AB --lines 2 shell/abun.sh"
    )
    print gettime("end 06.gene_profile_pre")
    return commands
def use_other_method(config, name):
    print gettime('start create other step script')
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/preprocess_for_assembly/other' % (main_dir, name)
    mkdir(work_dir)
    print 'This method is not complete,please select other method!'
    return work_dir, commands
Exemple #7
0
def use_kraken2_method(config, name):
    print gettime('start create kraken2 step script')
    work_dir, commands = '',[]
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/preprocess_for_taxon_profile/kraken2' % (main_dir, name)
    mkdir(work_dir)
    print 'This method is not complete,please select other method!'
    return work_dir, commands
def prepare():      #前期处理
    args = read_params(sys.argv)
    conf = ConfigParser()
    conf.read(args['config'])
    args['wdir'] = conf.get('param','work_dir')
    args['rdir'] = conf.get('param','raw_dir_name')
    args['rdir'] = '%s/%s' % (args['wdir'], args['rdir'])
    mkdir('%s/temp' % args['rdir'])
    return args
Exemple #9
0
def write_(out_d, sub_group_list):
    for sub in sub_group_list:
        sub_c_dic = "%s/%s" % (out_d, "VS".join(sub.keys()))
        mkdir(sub_c_dic)
        sub_group_name = "%s/sub_group.list" % sub_c_dic
        with open(sub_group_name, 'w+') as sub_w:
            for sub_key in sub:
                for tmp in sub[sub_key]:
                    sub_w.write("%s\t%s\n" % (tmp, sub_key))
def kegg_pre(config, name):
    print gettime("start 07.kegg_pre")
    commands=[]
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("## blat mapping")
    commands.append("perl %s/blatprot.pl /data_center_01/home/NEOLINE/zwd/project/PMO/LiuLin-ascites-stool/07.kegg/db.list %s/../05.gene_catalog/gene_catalog.split.list %s/"\
                    %(tool_default_dir,work_dir,work_dir))
    commands.append("nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 6G --jobs 10 --prefix KEGG --lines 1 shell/blat.sh &")
    print gettime("end 07.kegg_pre")
    return commands
Exemple #11
0
def prepare(dir, file, host):
    cdir = os.path.abspath(dir)
    mkdir('%s/shell' % dir)
    rdir3 = '%s/../00.raw_reads/03.qc' % cdir
    rdir5 = '%s/../00.raw_reads/05.clean_reads' % cdir
    if os.path.exists(file):
        sample = [data.split()[1] for data in read_file(file)]
    else:
        sample = set(os.popen('ls %s/*/*.1.fq | while read a; do b=${a##*/}; echo ${b%%.*}; done' % rdir3).read().strip().split('\n'))
    host = 'yes' if host=='T' or host =='True' else ''
    return cdir, rdir3, rdir5, sample, host
Exemple #12
0
def ardb_pre(config, name):
    print gettime("start 09.ardb_pre")
    commands=[]
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("## blat mapping")
    commands.append("cp /data_center_03/Project/AS/16_ARDB/db.list ./")
    commands.append("perl %s/blatprot.pl db.list %s/../05.gene_catalog/gene_catalog.split.list %s/"%(tool_default_dir,work_dir,work_dir))
    commands.append("nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 5G --jobs 10 --prefix AR --lines 1 --getmem shell/blat.sh &")
    print gettime("end 09.ardb_pre")
    return commands
def clean_reads(config, name):
    print gettime("start raw_reads")
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s' % (main_dir, name)
    mkdir(work_dir)
    commands.append('nohup python %s/merge.py -l %s/material/sample.list -c %s/ &' %\
                    (bin_defdir, main_dir, work_dir))
    commands.append('awk -F "\\t" \'{print $1"\\t"$2"\\t"$3"\\t"$4"\\t"$5"\\t"$6"\\t"$7}\' %s/00.raw_reads/qc_*.stat.tsv > %s/qc_stat.tsv' %\
                    (main_dir, work_dir))
    print gettime("end raw_reads")
    return commands
def eggnog_pre(config, name):
    print gettime("end 08.eggnog_pre")
    commands = []
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("## blat mapping")
    commands.append(
        "perl %s/blatprot.pl /data_center_06/Project/pracrice/yehaocheng_20160120/08.eggnog/db.list %s/../05.gene_catalog/gene_catalog.split.list %s"
        % (tool_default_dir, work_dir, work_dir))
    commands.append("nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 6G --jobs 10 --prefix NOG --lines 1 %s/shell/blat.sh &"\
                    %work_dir)
    print gettime("end 08.eggnog_pre")
    return commands
Exemple #15
0
def gene_catalog_pre(config, name):
    commands = []
    print gettime("start 05.gene_catalog_pre")

    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("## build gene catalog")
    commands.append("cat %s/../04.gene_predict/gene/*.fna > %s/redundant.gene_catalog.fna"\
                    %(work_dir,work_dir))
    commands.append("perl %s/cd-hit.pl %s/redundant.gene_catalog.fna %s/gene_catalog.fna 20"\
                    %(bin_gene_catalog_default_dir,work_dir,work_dir))
    print gettime("end 05.gene_catalog_pre")
    return commands
def cazy_pre(config, name):
    print gettime("start 12.cazy_pre")
    commands = []
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("## blat mapping")
    commands.append("perl %s/blatprot.pl /data_center_09/Project/lixr/00.DATA/CAZY_DB/db.list %s/../05.gene_catalog/gene_catalog.split.list %s"\
                    % (tools_dir,  work_dir,  work_dir))
    commands.append(
        "nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 6G --jobs 10 --prefix CAZY --lines 1 shell/blat.sh &"
    )
    print gettime("end 12.cazy_pre")
    return commands
Exemple #17
0
def taxon_pre(config, name):
    main_dir = os.path.dirname(config)
    mkdir('%s/%s/taxon_profile' % (main_dir, name))
    methods = ["snakemake","old_version","metaphlan2","kraken2"]
    for method in methods[:2]:
        if method == "snakemake":
            dir, commands = use_snakemake_method(config, name)
        elif method == "old_version":
            dir, commands = use_old_version(config, name)
        elif method == "metaphlan2":
            dir, commands = use_metaphlan2_method(config, name)
        else:
            dir, commands = use_kraken2_method(config, name)
        yield dir, commands
Exemple #18
0
def gene_predict_pre(config, name):
    print gettime("start 04.gene_predict_pre")
    commands = []
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    config_gene = ConfigParser()
    config_gene.read(config)
    ins_list = config_gene.get("param", "ins_list")
    mkdir(work_dir)
    commands.append("## gene_predict")
    commands.append("perl %s/GenePredict.pl -s %s/../03.assembly/scaftigs.list -l 100 -d %s"\
                    %(bin_gene_predict_default_dir,work_dir,work_dir))
    commands.append("nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 1G --jobs 10 --prefix GP --lines 2 %s/shell/predict.sh &"\
                    %work_dir)
    print gettime("end 04.gene_predict_pre")
    return commands
Exemple #19
0
def cag(config, name):
    print gettime("stat 11.cag")
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    material_dir = '%s/material' % os.path.dirname(config)
    if os.path.isdir(work_dir):
        pass
    else:
        mkdir(work_dir)

    config_group = ConfigParser()
    config_group.read(config)
    group = re.split('\s+|,\s*|\t+|,\t*|', config_group.get('param', 'group'))
    for (i, subgroup_name) in enumerate(group):

        #subgroup_filename = '0' + str((i+1)) + '.' + subgroup_name
        subgroup_filename = subgroup_name
        mkdir("%s/%s" % (work_dir, subgroup_filename))

        sample_num_in_groups, min_sample_num_in_groups, sample_num_total, group_num = parse_group(
            "%s/%s_group.list" % (material_dir, subgroup_name))
        if sample_num_total < 20:
            log = open(
                "%s/%s/Sample_not_enough.log" % (work_dir, subgroup_filename),
                "w+")
            log.write("The minimum sample size (20) is not met.")
            log.close
        else:
            grp_sh = []

            os.system(
                "cp %s/%s_group.list %s/%s/group.list" %
                (material_dir, subgroup_name, work_dir, subgroup_filename))
            grp_sh.append(
                "python %s/full_CAG.py -p %s/../06.gene_profile/gene.profile -d %s/%s -g %s/%s/group.list"
                % (bin_cag_default_dir, work_dir, work_dir, subgroup_filename,
                   work_dir, subgroup_filename))
            grp_sh.append(
                "python %s/cag_taxonomy.py -i %s/%s/outfile/cag -g %s/../05.gene_catalog/gene_catalog.fna -o %s/%s/taxonomy/"
                % (bin_cag_default_dir, work_dir, subgroup_filename, work_dir,
                   work_dir, subgroup_filename))
            grp_sh.append("python %s/cag_exe_sequence.py -d %s/%s" %
                          (bin_cag_default_dir, work_dir, subgroup_filename))
            grp_sh.append("\n")

            with open('%s/%s/cag_pre.sh' % (work_dir, subgroup_filename),
                      'w') as outf:
                outf.write('\n'.join(grp_sh))
    print gettime("end 11.cag")
def raw_reads(config, name):
    print gettime("start raw_reads")
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s' % (main_dir, name)
    mkdir(work_dir)
    commands.append('python %s/QC_main.py -b %s/material/batch.list -c %s/material/config.list -p %s' %\
                    (bin_default_dir, main_dir, main_dir, config))
    # commands.append("## Q20 Q30")
    # commands.append('cp %s/pipeline.cfg %s/pipeline.cfg' % (main_dir,work_dir))
    # commands.append('# nohup python %s/Q20_Q30_stat.py -b %s/material/batch.list -c %s/pipeline.cfg  -o . &' %\
    # (bin_default_dir, main_dir, work_dir))
    # commands.append('python %s/Q20_Q30_stat_python2_new.py -b %s/material/batch.list -c %s/pipeline.cfg  -o . ' %\
    # (bin_default_dir, main_dir, main_dir))
    print gettime("end raw_reads")
    return commands
Exemple #21
0
def use_snakemake_method(config, name):
    print gettime('start create snakemake step script')
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/preprocess_for_taxon_profile/snakemake_method' % (main_dir, name)
    mkdir(work_dir)

    # updata config.yaml
    with open(const.config_yaml,'r') as inf:
        data = yaml.load(inf)
    data['clean_reads_dir'] = '%s/01.clean_reads' % main_dir
    data['clean_reads_list'] = '%s/clean_reads.list' % work_dir
    data['outdir'] = '%s/alignment' % work_dir
    with open('%s/config.yaml' % work_dir, 'w') as outf:
        yaml.dump(data, outf, default_flow_style=False)

    # update cluster.yaml
    with open(const.cluster_yaml,'r') as inf:
        data = yaml.load(inf)
    data['__default__']['qsublog'] = '%s/log/' % work_dir
    data['align']['qsublog'] = '%s/log/align/' % work_dir
    data['abund']['qsublog'] = '%s/log/abund/' % work_dir
    data['abund_profile']['qsublog'] = '%s/log/' % work_dir
    with open('%s/cluster.yaml' % work_dir,'w') as outf:
        yaml.dump(data, outf, default_flow_style=False)
    # prepare file
    os.system('cp %s %s/Snakefile' % (const.snakemake, work_dir))
    mkdir('%s/log/align/'%work_dir,'%s/log/abund/'%work_dir, '%s/profile'%work_dir)

    commands.append('cp %s/01.clean_reads/clean_reads.list .' % main_dir)
    commands.append('## calculate abundance')
    commands.append('source activate /data_center_03/USER/huangy/soft/MAIN/anaconda2/envs/gutbio')
    commands.append('snakemake --cluster-config cluster.yaml --cluster \'qsub -o {cluster.qsublog} -e {cluster.qsublog} -l vf={cluster.vf} -q {cluster.queue}\' -j 40 --nolock')
    commands.append('source deactivate')
    commands.append('## form species profile')
    commands.append('ls alignment/*/*root.abundance >abund.list')
    #commands.append('python %s/02_taxonomy.py -d . -c ../%s/qc_%s.stat.tsv' % (bin_dir, raw_dir_name, batch_num))
    commands.append('python %s/02_taxonomy.py -i abund.list' % bin_dir)
    commands.append('rm abund.list')
    commands.append('for i in all phylum class order family genus species; do ls alignment/*/*$i.abundance |perl %s/201_profile - >profile/$i.profile; done' % bin_dir)
    commands.append('num=1;for i in phylum class order family genus species; do let num=num+1; python %s/201_profile_convert.py -i profile/$i.profile -o profile/otu_table_L$num.txt; done' % bin_dir)
    commands.append('ls profile/* | while read a; do cp $a ../../taxon_profile; done')
    commands.append('## reads use rate')
    commands.append('#ls alignment/*/*.MATCH.logs >match_logs.list')
    commands.append('#python %s/201_use_rate.py -i match_logs.list -o use_rate.stat.tsv -clean %s/00.raw_reads/qc_stat.tsv' % (bin_dir, main_dir))
    commands.append('#rm match_logs.list')
    return work_dir, commands
def qc_prepare(batch, sample_list, dir, host, type):
    sub_dir = []
    dir = dir.split()[1]
    host = host.split()[1]
    if not sample_list and not os.path.exists(sample_list):
        return 
    scr_dir = os.path.dirname(os.path.abspath(__file__))
    if host:
        selects = ['00.raw_reads','01.fastqc','02.rmadaptor','03.qc','04.rmhost','05.clean_reads']
    else:
        selects = ['00.raw_reads','01.fastqc','02.rmadaptor','03.qc']
    for name in selects:
        mkdir('%s/%s/%s' % (dir, name, batch))
        sub_dir.append('%s/%s/%s' % (dir, name, batch))
    mkdir('%s/shell' % dir)
    type = '-y' if type == '--type 33' else ''
    os.system('cp -f %s %s/%s_sample.list' % (sample_list, sub_dir[0], batch))
    return sample_list, sub_dir, type, scr_dir, dir, type, host
def assembly_soapdenove(config, name):
    print gettime("start 03.assembly soapdenove method")
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/preprocess_for_assembly/soapdenove' % (main_dir, name)
    # beginning assembly file
    commands.append("## best contigs")
    # commands.append('ls assembly/*/*/*scafSeq |while read a; do perl /data_center_06/Project/LiuLin-ascites-stool/03.assembly/bin/scaftigs.pl $a 500 ${a%%.*}.scaftigs.fna ${a%%.*}.scaftigs.stat; done')
    commands.append(
        'ls assembly/*/*/*scafSeq |while read a; do perl %s/scaftigs.pl $a 500 ${a%%.*}.scaftigs.fna ${a%%.*}.scaftigs.stat; done'
        % bin_dir)
    commands.append(
        "/data_center_03/USER/zhongwd/bin/list assembly/*/* >%s/list.txt" %
        work_dir)
    commands.append(
        "python %s/best_scaftigs_selecter.py -i %s/list.txt -o %s/best_scaftigs"
        % (bin_dir, work_dir, work_dir))
    commands.append("rm %s/list.txt" % work_dir)
    #commands.append("/data_center_03/USER/zhongwd/bin/list best_scaftigs/*stat | perl /data_center_07/Project/RY2015K16A01-1/03.assembly/bin/stat.pl >  %s/scaftigs.best.stat.tsv" % work_dir)
    commands.append(
        "/data_center_03/USER/zhongwd/bin/list best_scaftigs/*stat | perl %s/stat.pl >  %s/scaftigs.best.stat.tsv"
        % (bin_dir, work_dir))
    commands.append("## histogram")
    mkdir("%s/histogram/" % work_dir)
    commands.append(
        "ls best_scaftigs/*.scaftigs.fna | sed 's#best_scaftigs/\(.*\).fna#\\1#g' | while read a; do lengthfasta best_scaftigs/$a.fna >histogram/$a.length; done"
    )
    commands.append(
        "ls histogram/*.scaftigs.length |while read a; do Rscript %s/scaftigs_length.R $a ${a%%.*}.histogram.pdf; done"
        % bin_dir)
    commands.append(
        "ls histogram/*.pdf |while read a; do convert -density 300 $a ${a%%.*}.png; done"
    )
    commands.append("## upload")
    commands.append(
        "ls best_scaftigs/*fna |while read a ; do gzip -c $a >${a%%.*}.fna.gz; done"
    )
    commands.append("md5sum best_scaftigs/*.gz > best_scaftigs/scaftigs.md5")
    commands.append(
        'ls best_scaftigs/*scaftigs.fna | while read a;do b=${a##*/};echo -e "${b%%.*}\\t`pwd $a`/$a";done > ../../scaftigs.list'
    )

    print gettime("end 03.assembly")
    return work_dir, commands
def use_megahit_version(config, name):
    print gettime('start create megahit step script')
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/preprocess_for_assembly/megahit' % (main_dir, name)
    mkdir(work_dir)
    # prepare assembly file
    commands.append("## assembly")
    commands.append("cp %s/01.clean_reads/clean_reads.list %s" %
                    (main_dir, work_dir))
    commands.append(
        "perl %s/megahit_shell_maker.pl -l clean_reads.list -d %s" %
        (bin_dir, work_dir)
    )  # 参考:/data_center_11/Project/wenpp/01.wujianrong_20180822/03.assembly/assembly_megahit
    commands.append(
        "nohup /data_center_03/USER/zhongwd/bin/qsge --queue neo.q --memery 30G --jobs 2 --lines 1 --prefix megahit shell/assembly.sh &"
    )
    print gettime("end assembly_pre")
    return work_dir, commands
def use_soapdenove_method(config, name):
    print gettime('start create soapdenove step script')
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/preprocess_for_assembly/soapdenove' % (main_dir, name)
    mkdir(work_dir)
    # prepare assembly file
    config_gene = ConfigParser()
    config_gene.read(config)
    ins_list = config_gene.get("param", "ins_list")

    commands.append("## assembly")
    commands.append("cp %s/01.clean_reads/clean_reads.list %s/" %
                    (main_dir, work_dir))
    #commands.append("perl /data_center_03/USER/zhongwd/rd/12_soap_denovo/soapdenovo_shell_maker.pl -l clean_reads.list -i %s -minkmer 51 -maxkmer 63 -b 4 -d %s/"%( ins_list,work_dir))
    commands.append(
        "perl %s/soapdenovo_shell_maker.pl -l clean_reads.list -i %s -minkmer 51 -maxkmer 63 -b 4 -d %s/"
        % (bin_dir, ins_list, work_dir))
    commands.append(
        "nohup /data_center_03/USER/zhongwd/bin/qsge --queue big.q:all.q:all.q:all.q --memery 100G:5G:10G:3G --jobs 2 --lines 4 --prefix AS shell/assembly.sh &"
    )
    print gettime("end assembly_pre")
    return work_dir, commands
def eggnog(config, name):
    print gettime("end 08.eggnog")
    commands = []
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("rm %s/blat/all.m8" % work_dir)
    commands.append("cat %s/blat/* > %s/blat/all.m8" % (work_dir, work_dir))
    commands.append(command_default + "python %s/701_pick_blast_m8.py -i %s/blat/all.m8 -o %s/eggnog.m8"%\
                    (bin_kegg_default_dir,work_dir,work_dir))
    commands.append(command_default + "perl %s/03_get_annot_info.pl %s/eggnog.m8 /data_center_02/Database/eggNOGv4.0/all.members.txt /data_center_02/Database/eggNOGv4.0/all.description.txt /data_center_02/Database/eggNOGv4.0/all.funccat.txt %s/eggnog.m8.tab"%\
                    (bin_eggnog_default_dir,work_dir,work_dir))
    commands.append("perl %s/04_get_count.pl %s/eggnog.m8.tab /data_center_02/Database/eggNOGv4.0/eggnogv4.funccats.txt %s/eggnog.tab"%\
                    (bin_eggnog_default_dir,work_dir,work_dir))
    commands.append(command_default + "perl /data_center_07/Project/RY2015K16A01-1/08.eggnog/bin/eggnog.annotation.pl < %s/eggnog.m8.tab > %s/eggnog.anno.tsv"%\
                    (work_dir,work_dir))
    #获取分组名称
    config_gene = ConfigParser()
    config_gene.read(config)
    group = re.split("\s+|\t|,\s*|,\t+", config_gene.get("param", "group"))
    sample_names = config_gene.get("param", "sample_name")
    sample_num_in_groups, min_sample_num_in_groups, sample_num_total, group_num = parse_group(
        sample_names)
    if sample_num_total <= 10:
        mkdir("%s/samples" % work_dir)
        commands.append("cut -f 1 %s/../01.clean_reads/clean_reads.list | while read a ; do cut -f 1 %s/../06.gene_profile/alignment/$a/$a.gene.abundance > %s/samples/$a.gene.list; done"%\
                        (work_dir,work_dir,work_dir))
        commands.append("ls %s/samples/*gene.list | sed 's/.gene.list//g'|while read a; do perl %s/04_get_countlist.pl %s/eggnog.m8.tab /data_center_02/Database/eggNOGv4.0/eggnogv4.funccats.txt $a.gene.list $a.eggnog.tab;done"%\
                        (work_dir,bin_eggnog_default_dir,work_dir))
        commands.append("ls %s/samples/*.eggnog.tab | sed 's/.eggnog.tab//g' | while read a;do cut -f 3,4 $a.eggnog.tab > $a.eggnog.count.tab; done"%\
                        (work_dir))
        commands.append("ls %s/samples/*.eggnog.count.tab | /data_center_03/USER/zhongwd/bin/profile - > %s/eggnog.count.tab"%\
                        (work_dir,work_dir))
        commands.append("Rscript /data_center_04/Projects/pichongbingdu/pair_reads/08.eggnog/NOG.R %s/eggnog.count.tab"%\
                        work_dir)

    for subgroup_name in group:
        subgroup = '%s/material/%s_group.list' % (os.path.dirname(config),
                                                  subgroup_name)
        work_dir_01 = "%s/group/%s/" % (work_dir, subgroup_name)
        mkdir(work_dir_01)
        commands.append(
            "## ----------------------------------%s----------------------" %
            (subgroup_name))
        commands.append("cd %s; perl /data_center_06/Project/pracrice/yehaocheng_20160120/08.eggnog/bin/profile2list.pl %s %s/../06.gene_profile/gene.profile; cd -"%\
                        (work_dir_01,subgroup,work_dir))
        commands.append("ls %s/*gene.list | sed 's/.gene.list//g'|while read a; do perl %s/04_get_countlist.pl %s/eggnog.m8.tab /data_center_02/Database/eggNOGv4.0/eggnogv4.funccats.txt $a.gene.list $a.eggnog.tab;done"%\
                        (work_dir_01,bin_eggnog_default_dir,work_dir))
        commands.append("ls %s/*.eggnog.tab | sed 's/.eggnog.tab//g' | while read a;do cut -f 3,4 $a.eggnog.tab > $a.eggnog.count.tab; done"%\
                        (work_dir_01))
        commands.append("ls %s/*.eggnog.count.tab | /data_center_03/USER/zhongwd/bin/profile - > %s/eggnog.count.tab"%\
                        (work_dir_01,work_dir_01))
        commands.append("cd %s;Rscript /data_center_04/Projects/pichongbingdu/pair_reads/08.eggnog/NOG.R eggnog.count.tab;cd -"%\
                        (work_dir_01))
        commands.append("convert -density 300 %s/NOG.pdf %s/NOG.png" %
                        (work_dir_01, work_dir_01))
    print gettime("end 08.eggnog")
    return commands
Exemple #27
0
def ardb(config, name):
    print gettime("start 09.ardb")
    commands=[]
    work_dir = '%s/%s' % (os.path.dirname(config), name)
    mkdir(work_dir)
    commands.append("## blat mapping")
    commands.append("cat blat/* > all.m8")
    commands.append("pick_blast_m8 all.m8 > ardb.m8")
    commands.append("cut -f 2 ardb.m8 | search - /data_center_03/Project/AS/16_ARDB/old/ardbAnno1.0_modify_db07/tabs/ardb.tab | paste ardb.m8 - | cut -f 1,13-  > gene2ardb.tsv")
    commands.append("classprofile -i gene2ardb.tsv -p ../06.gene_profile/gene.profile -f 3 > ardb.type.profile")
    commands.append("classprofile -i gene2ardb.tsv -p ../06.gene_profile/gene.profile -f 4 > ardb.class.profile")
    commands.append("Rscript /data_center_07/Project/RY2015K16A01-1/09.ardb/bin/ardb.barplot.r\n")
    commands.append("(echo -e 'Gene ID\tProtein name\tType\tClass\tDescription'; cat gene2ardb.tsv) > ardb.anno.tsv")
    
    # groups
    config_gene = ConfigParser()
    config_gene.read(config)
    group = re.split("\s+|\t|,\s*|,\t+",config_gene.get("param","group"))
    for subgroup_name in group:
        subgroup = '%s/material/%s_group.list' % (os.path.dirname(config), subgroup_name)
        sample_num_in_groups,min_sample_num_in_groups,sample_num_total,group_num=parse_group(subgroup)
        commands.append("## ----------------------------------%s----------------------"%(subgroup_name))
        # diff 
        work_dir_901 = "%s/group/%s/01.class_diff/" % (work_dir,subgroup_name)
        mkdir(work_dir_901)
        work_dir_902 = "%s/group/%s/02.type_diff/" % (work_dir,subgroup_name)
        mkdir(work_dir_902)
        commands.append("#01 diff class")
        commands.append(command_default + "python %s/t08_diff.py -i %s/ardb.class.profile -g %s -o %s" % (tool_default_dir, work_dir,subgroup, work_dir_901))
        commands.append(command_default + "python %s/t09_diff_boxplot.py -i %s/diff.marker.filter.profile.tsv -p %s/diff.marker.filter.tsv -g %s -o %s/diff_boxplot/"\
                        %(tool_default_dir,work_dir_901,work_dir_901,subgroup,work_dir_901))
        commands.append("#02 diff type")
        commands.append(command_default + "python %s/t08_diff.py -i %s/ardb.class.profile -g %s -o %s" % (tool_default_dir, work_dir,subgroup, work_dir_902))
        commands.append(command_default + "python %s/t09_diff_boxplot.py -i %s/diff.marker.filter.profile.tsv -p %s/diff.marker.filter.tsv -g %s -o %s/diff_boxplot/"\
                        %(tool_default_dir,work_dir_902,work_dir_902,subgroup,work_dir_902))

        commands.append("#03 function_barplot")
        commands.append(command_default + "Rscript %s/710_level1_barplot.R %s/ardb.class.profile %s/group/%s/ardb.class.pdf Class %s"\
                        % (bin_ardb_default_dir, work_dir, work_dir, subgroup_name, subgroup))
        commands.append("convert -density 300 %s/group/%s/ardb.class.pdf %s/group/%s/ardb.class.png" % (work_dir, subgroup_name, work_dir, subgroup_name))            
        commands.append(command_default + "Rscript %s/710_level1_barplot.R %s/ardb.type.profile %s/group/%s/ardb.type.pdf Type %s"\
                        % (bin_ardb_default_dir, work_dir, work_dir, subgroup_name, subgroup))
        commands.append("convert -density 300 %s/group/%s/ardb.type.pdf %s/group/%s/ardb.type.png" % (work_dir, subgroup_name, work_dir, subgroup_name))
        
        if group_num==2:
            commands.append("#04 dimond swarm")
            commands.append(command_default + "Rscript %s/dimond_swarm.R %s/ardb.type.profile %s %s/group/%s/dimond_swarm.pdf"\
                        % (bin_ardb_default_dir, work_dir, subgroup, work_dir, subgroup_name))
            commands.append("convert -density 300 %s/group/%s/dimond_swarm.pdf %s/group/%s/dimond_swarm.png" % (work_dir, subgroup_name, work_dir, subgroup_name))
            commands.append("#05 top ardb")
            commands.append(command_default + "Rscript %s/top_ardb.R %s/ardb.type.profile %s %s/group/%s/top_ardb.pdf"\
                        % (bin_ardb_default_dir, work_dir, subgroup, work_dir, subgroup_name))
            commands.append("convert -density 300 %s/group/%s/top_ardb.pdf %s/group/%s/top_ardb.png" % (work_dir, subgroup_name, work_dir, subgroup_name))
        
    print gettime("end 009.ardb")
    return commands
Exemple #28
0
                        help="set the output dir")
    parser.add_argument('--with_boxplot', dest='with_boxplot', action='store_true',
                        help="plot boxplot")
    parser.add_argument('--without_boxplot', dest='with_boxplot', action='store_false',
                        help="unplot boxplot")
    parser.add_argument('--two_legend', dest='two_legend', action = 'store_true',default=False,
                        help="two_legend")
    parser.set_defaults(with_boxplot=True)
    args = parser.parse_args()
    params = vars(args)
    return params


if __name__ == '__main__':
    params = read_params(sys.argv)
    mkdir(params['out_dir'])
    pdf_file = params['out_dir'] + '/otu_pca.pdf'
    png_file = params['out_dir'] + '/otu_pca.png'
    vars = {'otu_profile': params['otu_table'],
            'group_file': params['group'],
            'pdf_file': pdf_file}

    r_job = Rparser()
    if params['two_legend']:
        if params['with_boxplot']:
            r_job.open(const.Rscript + '/02_tax_pca_two.R')
        else:
            r_job.open(const.Rscript + '/02_tax_pca_two.R')
    else:
        if params['with_boxplot']:
            r_job.open(const.Rscript + '/02_tax_pca_with_boxplot.R')
Exemple #29
0
def taxon(config,sh_default_file,outpath,name):
    print("start taxon :%s s"%time())
    commands = []
    work_dir = os.path.dirname(config)
    pyscript_dir = const.PYscript
    #updata config.yaml
    f = open(const.config_yaml)
    yl = yaml.load(f)
    f.close()
    yl["work_dir"]="%s/../"%work_dir
    yl["clean_reads_list"] = "02.tax/clean_read.list"
    fout = open('%s/config.yaml'%work_dir, "w")
    yaml.dump(yl,fout,default_flow_style=False)
    fout.close()
    os.system("cp %s %s/Snakefile"%(const.snakemake,work_dir))
    os.system("cp %s %s/cluster.yaml"%(const.cluster_yaml,work_dir))
    mkdir("%s/log/"%work_dir)
    mkdir("%s/log/align/"%work_dir)
    mkdir("%s/log/abund/"%work_dir)
    commands.append("## calculate abundance")
    commands.append("source activate gutbio")
    commands.append("snakemake --cluster-config cluster.yaml --cluster 'qsub -o {cluster.qsublog} -e {cluster.qsublog} -l vf={cluster.vf}' -j 10 --nolock --config clean_reads_list=\"02.taxon/clean_reads_list\"")
    commands.append("source deactivate")
    commands.append("## form species profile")
    commands.append("ls alignment/*/*species.abundance >list")
    commands.append("python /data_center_01/pipeline/huangy/metagenome/pyscript/02_taxnomy.py -i list")
    commands.append("rm list")
    mkdir("%s/profile/" % work_dir)
    commands.append("ls alignment/*/*species.abundance | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/species.profile")
    commands.append("ls alignment/*/*species.abundance2 | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/species.profile2")
    commands.append("ls alignment/*/*genus.abundance   | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/genus.profile")
    commands.append("ls alignment/*/*class.abundance   | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/class.profile")
    commands.append("ls alignment/*/*family.abundance  | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/family.profile")
    commands.append("ls alignment/*/*order.abundance   | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/order.profile")
    commands.append("ls alignment/*/*phylum.abundance  | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/phylum.profile")
    commands.append("ls alignment/*/*all.abundance | /data_center_01/pipeline/huangy/metagenome/perlscript/02_profile - > profile/all.profile")
    commands.append("echo '# Constructed from biom file' >profile/otu_table_L2.txt")
    commands.append("echo '# Constructed from biom file' >profile/otu_table_L3.txt")
    commands.append("echo '# Constructed from biom file' >profile/otu_table_L4.txt")
    commands.append("echo '# Constructed from biom file' >profile/otu_table_L5.txt")
    commands.append("echo '# Constructed from biom file' >profile/otu_table_L6.txt")
    commands.append("echo '# Constructed from biom file' >profile/otu_table_L7.txt")
    commands.append("cat profile/phylum.profile >> profile/otu_table_L2.txt")
    commands.append("cat profile/class.profile >> profile/otu_table_L3.txt")
    commands.append("cat profile/order.profile >> profile/otu_table_L4.txt")
    commands.append("cat profile/family.profile >> profile/otu_table_L5.txt")
    commands.append("cat profile/genus.profile >> profile/otu_table_L6.txt")
    commands.append("cat profile/species.profile2 >> profile/otu_table_L7.txt")

    commands.append("## use rate")
    commands.append("#mkdir use_rate")
    commands.append("#ls alignment/*/*MATCH |while read a; do echo \"perl /data_center_03/USER/zhongwd/rd/11_taxonomy_V2.0/bin/stat.pl < $a > $a.stat\" ;done > use_rate/stat.sh")
    commands.append("#nohup /data_center_03/USER/zhongwd/bin/qsge --queue all.q --resource vf=1G --maxjob 10 --jobprefix ST --lines 1 --getmem use_rate/stat.sh &")
    commands.append("#ls alignment/*/*MATCH.stat | perl /data_center_03/USER/zhongwd/rd/11_taxonomy_V2.0/bin/stat_tab.pl - ../00.raw_reads/qc_final.stat.tsv > use_rate/stat.tsv")
    config_gene = ConfigParser()
    config_gene.read(config)
    group = re.split("\s+|\t",config_gene.get("param","group"))
    mkdir("%s/" % work_dir)
    mkdir("%s/group/" % work_dir)
    commands.append("## 00.piechart     need finish")
    mkdir("%s/group/00.piechart"%(work_dir))
    commands.append("ls alignment/*/*species.abundance | sed 's/alignment\/\(.*\)\/.*species.abundance/\\1/g' | while read a ; do perl /data_center_03/USER/zhongwd/rd/11_taxonomy_V2.0/test/pieplot/pie.pl < alignment/$a/$a.species.abundance > group/00.piechart/$a.species.pie.svg;done")
    commands.append("ls alignment/*/*genus.abundance   | sed 's/alignment\/\(.*\)\/.*genus.abundance/\\1/g'   | while read a ; do perl /data_center_03/USER/zhongwd/rd/11_taxonomy_V2.0/test/pieplot/pie.pl < alignment/$a/$a.genus.abundance   > group/00.piechart/$a.genus.pie.svg;done")

    commands.append("## 03.accum")
    mkdir("%s/03.accum_share"%(work_dir))
    commands.append("ln -s ../profile/genus.profile 03.accum_share/")
    commands.append("ln -s ../profile/species.profile 03.accum_share/")
    commands.append("perl /data_center_03/USER/zhongwd/rd/Finish/07_acumm_share_curve/Accumulated_Shared_Curve.pl -p 03.accum_share/genus.profile -c genus -t 100")
    commands.append("perl /data_center_03/USER/zhongwd/rd/Finish/07_acumm_share_curve/Accumulated_Shared_Curve.pl -p 03.accum_share/species.profile -c species -t 100")
    commands.append("## 04.rarecurve")
    mkdir("%s/04.rarecurve"%(work_dir))
    commands.append("#list alignment/*/*MATCH > 04.rarecurve/match.list; sed 's/.*alignment\/\(.*\)\/.*MATCH/\\1/g' 04.rarecurve/match.list | paste - 04.rarecurve/match.list > 04.rarecurve/match.list.tmp; mv -f 04.rarecurve/match.list.tmp 04.rarecurve/match.list")
    commands.append("#nohup perl /data_center_03/USER/zhongwd/rd/05_rarecurve/RareCurve/RareCurve.pl -s clean_reads_list -m 04.rarecurve/match.list -d 04.rarecurve &")

    commands.append("## 06.ternaryplot")
    mkdir("%s/06.ternaryplot"%(work_dir))
    commands.append("Rscript /data_center_01/pipeline/huangy/metagenome/Rscript/02_ternary.R profile/species.profile sample.list 06.ternaryplot/species.ternary.pdf species")
    commands.append("Rscript /data_center_01/pipeline/huangy/metagenome/Rscript/02_ternary.R profile/genus.profile   sample.list 06.ternaryplot/genus.ternary.pdf   genus")
    commands.append("## 07.treeplot")
    mkdir("%s/07.treeplot"%(work_dir))
    commands.append("cut -f 1 clean_reads_list | while read a; do mkdir 07.treeplot/$a; perl /data_center_03/USER/zhongwd/temp/0106/tree/a.pl < alignment/$a/$a.species.abundance > 07.treeplot/$a/test.info 2> 07.treeplot/$a/test.tax; done")
    commands.append("cut -f 1 clean_reads_list | while read a; do cd 07.treeplot/$a; perl /data_center_03/USER/zhongwd/temp/0106/tree/zwd_newwick.pl < test.tax > test.tre; ~/anaconda_ete/bin/python /data_center_03/USER/zhongwd/temp/0106/tree/plottre.py; cd -; done")

    commands.append("## 08.cluster")
    mkdir("%s/08.cluster"%(work_dir))
    commands.append("Rscript /data_center_03/USER/zhongwd/rd/11_taxonomy_V2.0/test/barplot/bartreeplot.r profile/species.profile sample.list 08.cluster/species.clust.pdf")

    for subgroup in group:
        dirname,subgroup_name,_ = get_name(subgroup)
        _,min_sample_num_in_groups,sample_num_total,group_num=parse_group(subgroup)
        mkdir("%s/group/%s"%(work_dir,subgroup_name))
        commands.append("## 01.barplot      need finish")
        mkdir("%s/group/%s/01.barplot"%(work_dir,subgroup_name))
        commands.append("/data_center_01/pipeline/16S_ITS_pipeline_v3.0/script/02_bar_plot.py -t %s/profile/ -o %s/group/%s/01.barplot/ \
        -g %s "%(work_dir,work_dir,subgroup_name,subgroup))
        commands.append("/data_center_01/pipeline/16S_ITS_pipeline_v3.0/script/02_bar_plot.py -t %s/profile/ -o %s/group/%s/01.barplot/ \
        -g %s --level 7 "%(work_dir,work_dir,subgroup_name,subgroup))
        #commands.append("%s/02_bar_plot.py -i %s/profile/genus.profile -o %s/group/%s/01.barplot/genus.pdf \
        #-g %s -t %s"%(pyscript_dir,work_dir,work_dir,subgroup_name,subgroup,"genus"))
        commands.append("## 02.core")
        mkdir("%s/group/%s/02.core"%(work_dir,subgroup_name))
        commands.append("python %s/02_venn.py -i %s/profile/otu_table_L7.txt -o %s/group/%s/02.core/%s/ -g %s "\
                        %(pyscript_dir,work_dir,work_dir,subgroup_name,"species",subgroup))
        commands.append("python %s/02_venn.py -i %s/profile/otu_table_L7.txt -o %s/group/%s/02.core/%s/ -g %s "\
                        %(pyscript_dir,work_dir,work_dir,subgroup_name,"genus",subgroup))
        commands.append("## 05.top_boxplot")
        mkdir("%s/group/%s/05.top_boxplot"%(work_dir,subgroup_name))
        commands.append("python %s/02_top.py -i %s/profile/all.profile -g %s -o %s/group/%s/05.top_boxplot/"%(pyscript_dir,work_dir,subgroup,work_dir,subgroup_name))
        commands.append("## 09.pca")
        mkdir("%s/group/%s/09.pca"%(work_dir,subgroup_name))
        commands.append("python %s/02_otu_pca.py -i %s/profile/species.profile -g %s -o %s/group/%s/09.pca --with_boxplot"%\
                        (pyscript_dir,work_dir,subgroup,work_dir,subgroup_name))
        commands.append("python %s/02_otu_pca.py -i %s/profile/genus.profile -g %s -o %s/group/%s/09.pca --with_boxplot"%\
                        (pyscript_dir,work_dir,subgroup,work_dir,subgroup_name))
        commands.append("## 11.anosim; 13.pcoa; 14.nmds")
        mkdir("%s/group/%s/11-14.beta_div"%(work_dir,subgroup_name))
        mkdir("%s/group/%s/11-14.beta_div/species"%(work_dir,subgroup_name))
        mkdir("%s/group/%s/11-14.beta_div/genus"%(work_dir,subgroup_name))
        commands.append("cd group/%s/11-14.beta_div/species; perl /data_center_01/pipeline/huangy/metagenome/perlscript/02_Beta_diversity.pl -p ../../../../profile/species.profile -g %s -m bray -r; cd -"%(subgroup_name,subgroup))
        commands.append("cd group/%s/11-14.beta_div/genus; perl /data_center_01/pipeline/huangy/metagenome/perlscript/02_Beta_diversity.pl -p ../../../../profile/genus.profile -g %s -m bray -r; cd -"%(subgroup_name,subgroup))
        mkdir("%s/group/%s/15.LEfSe"%(work_dir,subgroup_name))
        commands.append("python /data_center_01/pipeline/16S_ITS_pipeline_v3.0/script/05_filter_abundance.py  -i %s/profile/species.profile \
        -g %s -o %s/group/%s/15.LEfSe/ --cut_off 1e-5"%(work_dir,subgroup,work_dir,subgroup_name))
        commands.append("python %s/02_LEfSe.py -i %s/group/%s/15.LEfSe/species.profile -l /data_center_03/USER/huangy/soft/LEfSe_lzb -g %s -o %s/group/%s/15.LEfSe/ --LDA 2"\
                        %(pyscript_dir,work_dir,subgroup_name,subgroup,work_dir,subgroup_name))
    print("end taxon :%s s"%time())
    return commands
Exemple #30
0
def group(config, name):
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/group_analysis' % (main_dir, name)
    pro_dir = '%s/%s/taxon_profile' % (main_dir, name)
    mkdir(work_dir)
    # get group summary
    config_gene = ConfigParser()
    config_gene.read(config)
    group = list(filter(None,re.split('\s+|\t|,\s*|,\t+',config_gene.get('param','group'))))
    sample_names = config_gene.get('param','sample_name')
    raw_dir_name = config_gene.get('param','raw_dir_name')
    alpha_group = config_gene.get('param','alpha_group')
    with open(config_gene.get('param','batch_list')) as fqin:
        for line in fqin:
            tabs = line.strip().split()
            if len(tabs)>1:
                batch_num = tabs[0]
    sample_num_in_groups,min_sample_num_in_groups,sample_num_total,group_num=parse_group(sample_names)

    for subgroup in group:
        grp_sh = []
        mkdir('%s/%s' % (work_dir, subgroup))
        os.system('cp %s/material/%s_group.list %s/%s/group.list' % (main_dir, subgroup, work_dir, subgroup))
        sample_num_in_groups,min_sample_num_in_groups,sample_num_total,group_num=parse_group('%s/material/%s_group.list' % (main_dir, subgroup))

        grp_sh.append('## 01.barplot Need finish')
        work_dir_201 = '%s/01.barplot' % subgroup
        mkdir('%s/%s' % (work_dir,work_dir_201))
        grp_sh.append('%s/g01_barplot.py -t %s/ -o 01.barplot/ -g group.list' % (bin_dir, pro_dir))
        grp_sh.append('%s/g01_barplot.py -t %s/ -o 01.barplot/ -g group.list --level 7' % (bin_dir, pro_dir))

        grp_sh.append('## 02.venn_flower')
        work_dir_202 = '%s/02.venn_flower' % subgroup
        mkdir('%s/%s' % (work_dir, work_dir_202))
        if group_num>=6 and group_num<=30:
            grp_sh.append('for i in phylum genus species; do perl %s/g02_flower.pl %s/$i.profile group.list 02.venn_flower/$i/; done' % (bin_dir, pro_dir))
            #grp_sh.append('perl %s/g02_flower.pl profile/species.profile group.list 02.venn_flower' % bin_dir)
            #grp_sh.append('perl %s/g02_flower.pl profile/genus.profile group.list 02.venn_flower' % bin_dir)
        elif group_num>=2 and group_num<6:
            grp_sh.append('for i in phylum genus species; do perl %s/g02_flower.pl %s/$i.profile group.list 02.venn_flower/$i/; done ' % (bin_dir, pro_dir))
            #grp_sh.append('python %s/g02_venn_flower.py -i profile/genus.profile -o 02.venn_flower -l genus -g group.list --with_group ' % bin_dir)
            #grp_sh.append('python %s/g02_venn_flower.py -i profile/species.profile -o 02.venn_flower -l species -g group.list --with_group ' % bin_dir)
        else:
            grp_sh.append('## sample too much')

        grp_sh.append('## 03.top_boxplot')
        work_dir_203 = '%s/03.top_boxplot' % subgroup
        mkdir('%s/%s' % (work_dir, work_dir_203))
        grp_sh.append('python %s/g03_top_boxplot.py -i %s/all.profile -g group.list -o 03.top_boxplot/' % (bin_dir, pro_dir))

        grp_sh.append('## 04.ternaryplot')
        work_dir_204 = '%s/04.ternaryplot' % subgroup
        mkdir('%s/%s' % (work_dir, work_dir_204))
        if group_num == 2:
            grp_sh.append('for i in phylum genus species; do Rscript %s/g04_ternary.R %s/$i.profile group.list 04.ternaryplot/$i.ternary.pdf $i; done' % (bin_dir, pro_dir))
        elif group_num == 3:
            grp_sh.append('for i in phylum genus species; do Rscript %s/t13_ternary.py -i %s/$i.profile -g group.list -o 04.ternaryplot/$i -c $i; done' % (tool_dir, pro_dir))
        elif group_num >3 and group_num < 6:
            grp_sh.append('python %s/g04_ternary_sub_group.py -g group.list -o 04.ternaryplot/' % bin_dir)
            grp_sh.append('for i in `ls 04.ternaryplot/`;do for j in phylum genus species;do if [ -d 04.ternaryplot/$i ] ;then python %s/t13_ternary.py -i %s/$j.profile -g 04.ternaryplot/$i/sub_group.list -o 04.ternaryplot/$i/$j -c $j;fi;done;done' % (tool_dir,pro_dir)) 
        grp_sh.append('## 05.top_barplot')
        work_dir_205 = '%s/05.top_barplot' % subgroup
        mkdir('%s/%s' % (work_dir, work_dir_205))
        grp_sh.append('for i in phylum genus species; do python %s/t10_sample_clustering.py -i %s/$i.profile -g group.list -o 05.top_barplot/$i/ -t $i; done' % (tool_dir, pro_dir))

        grp_sh.append('## 06.pca')
        work_dir_206 ='%s/06.pca' % subgroup
        mkdir('%s/%s' % (work_dir, work_dir_206))
        grp_sh.append('for i in phylum genus species; do python %s/t01_pca.py -i %s/$i.profile -g group.list -o 06.pca/$i/ --with_boxplot; done' % (tool_dir, pro_dir))

        grp_sh.append('## 07.pcoa')
        work_dir_207 = '%s/07.pcoa' % subgroup
        mkdir('%s/%s' % (work_dir, work_dir_207))
        grp_sh.append('for i in phylum genus species; do python %s/t02_pcoa.py -i %s/$i.profile -g group.list -o 07.pcoa/$i/ --with_boxplot; done' % (tool_dir, pro_dir))

        grp_sh.append('## 08.nmds')
        work_dir_208 = '%s/08.nmds' % subgroup
        mkdir('%s/%s' % (work_dir, work_dir_208))
        grp_sh.append('for i in phylum genus species; do python %s/t03_nmds.py -i %s/$i.profile -g group.list -o 08.nmds/$i/; done' % (tool_dir, pro_dir))

        grp_sh.append('## 09.anosim')
        work_dir_209 = '%s/09.anosim' % subgroup
        mkdir('%s/%s' % (work_dir, work_dir_209))
        grp_sh.append('for i in phylum genus species; do python %s/t04_anosim.py -i %s/$i.profile -g group.list -o 09.anosim/$i/; done' % (tool_dir, pro_dir))

        grp_sh.append('## 10.adonis')
        work_dir_209 = '%s/10.adonis' % subgroup
        mkdir('%s/%s' % (work_dir, work_dir_209))
        grp_sh.append('for i in phylum genus species; do python %s/t12_adonis_pca.py -i %s/$i.profile -g group.list -o 10.adonis/$i/; done' % (tool_dir, pro_dir))
        grp_sh.append('for i in phylum genus species; do python %s/t12_adonis_pcoa.py -i %s/$i.profile -g group.list -o 10.adonis/$i/; done' % (tool_dir, pro_dir))
        
        grp_sh.append('## 11.mrpp')
        work_dir_210 = '%s/11.mrpp' % subgroup
        mkdir('%s/%s' % (work_dir, work_dir_210))
        grp_sh.append('for i in phylum genus species; do python %s/t05_mrpp.py -i %s/$i.profile -g group.list -o 11.mrpp/$i/; done' % (tool_dir, pro_dir))

        grp_sh.append('## 12.diff')
        work_dir_211 = '%s/12.diff' % subgroup
        mkdir('%s/%s' % (work_dir, work_dir_211))
        grp_sh.append('for i in phylum genus species; do python %s/t08_diff.py -i %s/$i.profile -g group.list -o 12.diff/$i; done' % (tool_dir, pro_dir))
        grp_sh.append('for i in phylum genus species; do Rscript %s/t08_new_diff.R 12.diff/$i/diff.marker.filter.profile.tsv 12.diff/$i/diff.marker.filter.tsv group.list 12.diff/$i/${i}_diff.pdf n add;convert -density 300 12.diff/$i/${i}_diff.pdf 12.diff/$i/${i}_diff.png; done' % tool_dir)

        grp_sh.append('## 13.lefse')
        work_dir_212 = '%s/13.lefse/' % subgroup
        mkdir('%s/%s' % (work_dir, work_dir_212))
        grp_sh.append('for i in phylum genus species; do /data_center_03/USER/huangy/soft/MAIN/anaconda2/bin/python2.7 %s/t11_lefse.py -i %s/$i.profile -l /data_center_03/USER/huangy/soft/LEfSe_lzb -g group.list -o 13.lefse/$i --LDA 2; done' % (tool_dir, pro_dir))
        with open('%s/%s/work.sh' % (work_dir, subgroup), 'w') as outf:
            outf.write('\n'.join(grp_sh))
Exemple #31
0
def samples(config, name):
    commands = []
    main_dir = os.path.dirname(config)
    work_dir = '%s/%s/samples_nalysis' % (main_dir, name)
    pro_dir = '%s/%s/taxon_profile' % (main_dir, name)
    mkdir(work_dir)
    # alignment目录
    snakemake_method_dir = '%s/%s/preprocess_for_taxon_profile/snakemake_method' % (main_dir, name)
    # get group summary
    config_gene = ConfigParser()
    config_gene.read(config)
    sample_names = config_gene.get('param','sample_name')
    raw_dir_name = config_gene.get('param','raw_dir_name')
    alpha_group = config_gene.get('param','alpha_group')
    with open(config_gene.get('param','batch_list')) as fqin:
        for line in fqin:
            tabs = line.strip().split()
            if len(tabs)>1:
                batch_num = tabs[0]
    sample_num_in_groups,min_sample_num_in_groups,sample_num_total,group_num=parse_group(sample_names)

    commands.append('## 00.reads use rate ')
    commands.append('ls ../preprocess_for_taxon_profile/*/alignment/*/*.MATCH.logs > %s/list' % work_dir)
    commands.append('python %s/201_use_rate_new.py -i %s/list -o %s/use_rate.stat.tsv -clean %s/../../00.raw_reads/qc_rawdata.stat.tsv' % (bin_dir, work_dir, work_dir, work_dir))
    commands.append('rm %s/list' % work_dir)

    commands.append('## 01.piechart Need finish')
    mkdir('%s/01.piechart' % work_dir)
    commands.append('for i in phylum genus species; do find .. -name *$i.abundance | while read a; do b=${a##*/}; perl %s/s01_pie.pl <$a >01.piechart/${b%%.*}.pie.svg; done; done' % bin_dir)
    #commands.append('ls alignment/*/*species.abundance |while read a; do b=${a##*/}; perl %s/00.piechart/pie.pl <$a >00.piechart/${b%%.*}.pie.svg; done' % bin_dir)
    #commands.append('ls alignment/*/*genus.abundance |while read a; do b=${a##*/}; perl %s/00.piechart/pie.pl <$a >00.piechart/${b%%.*}.pie.svg; done' % bin_dir)
    
    #commands.append('## 01.barplot')
    #mkdir('%s/01.barplot' % work_dir)
    #os.system('cp alpha_group 01.barplot/all_Sample.list' % (main_dir, subgroup, work_dir, subgroup))
    #commands.append('%s/g01_barplot.py -t %s/ -o 01.barplot/ -g 01.barplot/all_Sample.list' % (bin_dir, pro_dir))
    #commands.append('%s/g01_barplot.py -t %s/ -o 01.barplot/ -g 01.barplot/all_Sample.list --level 7' % (bin_dir, pro_dir))
    
    commands.append('## 02.venn_flower')
    venn_dir = '02.venn_flower'
    mkdir('%s/02.venn_flower' % work_dir)
    if sample_num_total>=6 and sample_num_total<=30:
        commands.append('for i in phylum genus species; do perl %s/g02_flower.pl ../taxon_profile/$i.profile %s %s/$i/; done' % (bin_dir, sample_names, venn_dir))
        #commands.append('perl %s/02.taxon/7_flower.pl profile/species.profile %s %s/species/' % (bin_dir, sample_names, vennf_dir))
        #commands.append('perl %s/02.taxon/7_flower.pl profile/genus.profile %s %s/genus/' % (bin_dir, sample_names, vennf_dir))
    elif sample_num_total>=2 and sample_num_total<6:
        commands.append('for i in phylum genus species; do perl %s/g02_flower.pl ../taxon_profile/$i.profile %s %s/$i/; done' % (bin_dir, sample_names, venn_dir))
        #commands.append('python %s/02.taxon/7_venn_flower.py -i profile/species.profile -o %s -l species -g %s ' % (bin_dir, vennf_dir, sample_names))
        #commands.append('python %s/02.taxon/7_venn_flower.py -i profile/genus.profile -o %s -l genus -g %s ' % (bin_dir, vennf_dir, sample_names))
    else:
        commands.append('# sample too much')

    commands.append('## 03.accumulate')
    accum_dir = '03.accum_share'
    mkdir('%s/03.accum_share' % work_dir)
    commands.append('for i in phylum genus species; do ln %s/$i.profile %s/; done' % (pro_dir, accum_dir))
    # commands.append('for i in phylum genus species; do ln %s/taxon_profile/$i.profile %s/; done' % (main_dir, accum_dir))  # by liulf
    #commands.append('ln %s/profile/species.profile %s/species.profile' % (work_dir, accum_dir))
    #commands.append('ln %s/profile/genus.profile %s/genus.profile' % (work_dir, accum_dir))
    commands.append('for i in phylum genus species; do perl %s/Accumulated_Shared_Curve.pl -p %s/$i.profile -c $i -t 100; done' % (bin_dir, accum_dir))
    #commands.append('perl %s/03.accum/Accumulated_Shared_Curve.pl -p %s/genus.profile -c genus -t 100' % (bin_dir, accum_dir))
    #commands.append('perl %s/03.accum/Accumulated_Shared_Curve.pl -p %s/species.profile -c species -t 100' % (bin_dir, accum_dir))

    commands.append('## 04.rarecurve')
    rare_dir = '04.rarecurve'
    mkdir('%s/04.rarecurve' % work_dir)
    commands.append('ls %s/alignment |while read a; do echo "python %s/204_rarefaction.py -i %s/alignment/$a/$a.MATCH -o %s/alignment/$a/ -m 1820252 -b 200 -n $a"; done >%s/04.rarecurve/rarefaction.sh' % (snakemake_method_dir, bin_dir, snakemake_method_dir, snakemake_method_dir, work_dir))
    commands.append('sh %s/04.rarecurve/rarefaction.sh' % work_dir)
    commands.append('rm %s/04.rarecurve/rarefaction.sh' % work_dir)
    commands.append('paste %s/alignment/*/rarefaction.tsv >%s/all.rarefaction.tsv' % (snakemake_method_dir, work_dir))
    commands.append('Rscript %s/204_chao1.R %s/all.rarefaction.tsv %s %s/chao1.pdf %s/chao1.png' % (bin_dir, work_dir, alpha_group, rare_dir, rare_dir))
    commands.append('#list %s/alignment/*/*MATCH > 04.rarecurve/match.list; sed \'s/.*alignment\/\(.*\)\/.*MATCH/\\1/g\' 04.rarecurve/match.list | paste - 04.rarecurve/match.list >04.rarecurve/match.list.tmp; mv -f 04.rarecurve/match.list.tmp 04.rarecurve/match.list  % (snakemake_method_dir)')
    commands.append('#nohup perl /data_center_03/USER/zhongwd/rd/05_rarecurve/RareCurve/RareCurve.pl -s clean_reads_list -m 04.rarecurve/match.list -d 04.rarecurve &')
    commands.append('## 05.treeplot')
    tree_dir = '05.treeplot'
    mkdir('%s/05.treeplot' % work_dir)
    mkdir('%s/05.treeplot-shell' % work_dir)
    commands.append('cut -f1 ../../01.clean_reads/clean_reads.list | while read a; do mkdir -p %s/$a; echo "python %s/treeplot.tax.py -i %s/alignment/$a/$a.species.abundance -o %s/$a/"; done > %s/05.treeplot-shell/work.sh' % (tree_dir, bin_dir, snakemake_method_dir, tree_dir, work_dir))
    commands.append('/data_center_03/USER/zhongwd/bin/qsge --queue all.q --memery 5G --jobs 13 --prefix py --lines 1 %s/05.treeplot-shell/work.sh' % work_dir)
    commands.append('cut -f1 ../../01.clean_reads/clean_reads.list | while read a; do cd %s/$a; perl %s/treeplot.newwick_new.pl test.info test.tax new_test.info>test.tre; python %s/treeplot.plot.py test.tre new_test.info tree.pdf; cd -; done' % (tree_dir, bin_dir, bin_dir))
    with open('%s/work.sh' % work_dir, 'w') as outf:
        outf.write('\n'.join(commands))
Exemple #32
0
                        print "sample %s no in group" % samples[ind-1]
    return otu_in_group



def write(otu_in_group, outfile):
    with open(outfile, 'w') as fp:
        for group, otus in otu_in_group.iteritems():
            otus = sorted(list(otus), cmp=lambda a, b: cmp(int(a), int(b)))
            fp.write('%s\t%s\n' % (group, ' '.join(otus)))


if __name__ == '__main__':
    params = read_params(sys.argv)
    dirname=params['outputdir']
    mkdir(dirname)
    for_plot = dirname + '/for_plot.txt'
    tiff_file = dirname + '/venn.tiff'
    png_file = dirname + '/venn.png'
    vars = {'for_plot': for_plot,
            'tiff_file': tiff_file,
            'group_file': params['group_dir']}

    otu_in_group = read(params['microbial_profile'], params['group'], vars)
    write(otu_in_group, for_plot)

    r_job = Rparser()
    r_job.open(const.Rscript + '02_venn.R')
    r_job.format(vars)
    r_job.write(dirname+ '/otu_venn.R')
    r_job.run()
        default="bray",
        help=
        "please set method Dissimilarity index, partial match to manhattan euclidean \
                         canberra bray kulczynski jaccard gower altGower morisita horn\
                          mountford raup binomial chao cao mahalanobis")
    args = parser.parse_args()
    params = vars(args)
    return params


if __name__ == '__main__':
    reload(sys)
    sys.setdefaultencoding('utf8')
    params = read_params(sys.argv)
    tool_default_dir = const.tool_default_dir
    mkdir(params['out_dir'])
    use_method = params["method"]
    pdf_file = params['out_dir'] + "/" + use_method + '_adonis_pcoa.pdf'
    png_file = params['out_dir'] + "/" + use_method + '_adonis_pcoa.png'
    env = Environment(loader=FileSystemLoader(tool_default_dir),
                      autoescape=False)
    template = env.get_template("t12_adonis_pcoa.R")
    Rtxt = template.render(tool_default_dir = tool_default_dir,\
                           profile_table =params['profile_table'],\
                           group_file =params['group_file'],\
                           pdf_file = pdf_file,\
                           method = use_method)
    with open("%s/adonis_pcoa.R" % params["out_dir"], "w") as fqout:
        fqout.write(Rtxt)
    Rrun("%s/adonis_pcoa.R" % params["out_dir"])
    image_trans(pdf_file, png_file)

if __name__ == '__main__':
    reload(sys)
    sys.setdefaultencoding('utf8')
    params = read_params(sys.argv)
    profile_table = params["profile_table"]
    profile_kegg = params["profile_kegg"]
    group_file = params["group"]
    out_dir = params["out_dir"]
    cutoff_p = params["cutoff"]
    cutoff_estimate = params["estimate"]
    level = params["level"]
    filter = params["filter"]
    default_dir = const.pipeline_dir
    pdf_out = "%s/corr_heatmap.pdf" % out_dir
    png_out = "%s/corr_heatmap.png" % out_dir
    mkdir(out_dir)
    env = Environment(loader=FileSystemLoader("%s/../alternative/" %
                                              const.bin_default_dir),
                      autoescape=False)
    template = env.get_template("corr_heatmap.R")

    Rtxt = template.render(groupfile=group_file,taxfile=profile_table,keggfile=profile_kegg,\
                          cutoff_p=cutoff_p,cutoff_estimate=cutoff_estimate,\
                           default_dir=default_dir,pdf_out=pdf_out,level=level,\
                           filter=filter)
    with open("%s/corr_heatmap.R" % out_dir, "w") as fqout:
        fqout.write(Rtxt)
    os.system("Rscript %s/corr_heatmap.R" % out_dir)
    os.system("convert -density 300 %s %s" % (pdf_out, png_out))
Exemple #35
0
    params = vars(args)
    params['group'] = parse_group_file(params['group'])
    return params


if __name__ == '__main__':
    params = read_params(sys.argv)
    outputfile = params['outputfile']
    dirname,filename,suffix =get_name(outputfile)
    inputfile = params['inputfile']
    top = params['top']
    title = params['title']
    data = pd.DataFrame.from_csv(file=inputfile,sep="\t")
    data["sum"] = data.sum(axis=1)
    data = data.sort_values(by="sum",ascending=False)
    del data["sum"]
    data = data.ix[:top]
    data.to_csv("%s/for_plot.csv"%dirname,sep="\t")
    mkdir(os.path.split(outputfile)[0])
    RscriptDir = const.Rscript
    r_job = Rparser()
    r_job.open("%s/02_barplot.R"%RscriptDir)
    vars = {"top":top,
            "infile": "%s/for_plot.csv"%dirname,
            "pdf_file": outputfile,
            "title": title}
    r_job.format(vars)
    r_job.write("%s/bar_plot.R"%dirname)
    r_job.run()
    image_trans(300,"%s/%s.pdf"%(dirname,filename),"%s/%s.png"%(dirname,filename))