Python CONFIG Examples, seqkit.CONFIG Python Examples

Example #1

0

Show file

def run_peakanno(project, peak_call, slurm=False, job_file=None):
    """ Will run the peak annotation on the peak called regions """
    root_dir = conf.get('root_dir', '')
    proj_dir = os.path.join(root_dir, project)
    samples = map(str,
                  glob(os.path.join(proj_dir, "*", "*{}*".format(peak_call))))
    TSS_cmd = conf.get('anno_TSS', '')
    NDG_cmd = conf.get('anno_NDG', '')
    sbatch_template = (
        '#!/bin/bash -l\n'
        '#SBATCH -A b2012025\n'
        '#SBATCH -J {nm}_peakanno\n'
        '#SBATCH -p core -n 1 \n'
        '#SBATCH -t 3:00:00\n'
        '#SBATCH --mail-type=FAIL\n'
        '#SBATCH --mail-user=\'[email protected]\'\n\n')
    '#SBATCH -o {peaks_dir}/{name}_peakcall.stdout\n'
    '#SBATCH -e {peaks_dir}/{name}_peakcall.stderr\n'

    template_peakanno = (
        '\n## Running peak-annotations\n'
        'for bed in $(ls --color=never {peaks_dir}/*narrowPeak);do\n'
        'cut -f1-6 $bed > {peaks_dir}/{nm}_annotate \n'
        '' + TSS_cmd + '\n'
        '' + NDG_cmd + '\n'
        'python ' + col_match.__file__ +
        ' {anno_dir}/{nm}_annotate.tss {anno_dir}/{nm}_annotate.ndg {anno_dir}/{nm}_merged "merge"\n'
        'rm {peaks_dir}/{nm}_annotate\n'
        'done\n')

    for sam in samples:
        sam_dir = os.path.split(sam)[0]
        nm = os.path.basename(sam_dir)
        annotate_dir = os.path.join(sam_dir, "peakannotate")
        if not os.path.exists(annotate_dir):
            os.makedirs(annotate_dir)
        if job_file:
            with open(job_file, 'a') as jb_fl:
                jb_fl.write(
                    template_peakanno.format(peaks_dir=sam,
                                             nm=nm,
                                             anno_dir=annotate_dir))
            return
        if slurm:
            job_file = os.path.join(sam_dir, "scripts",
                                    "{}_peakannotate.sh".format(nm))
            template_anno = sbatch_template + template_peakanno
            with open(job_file, 'w') as jb_fl:
                jb_fl.write(
                    template_anno.format(peaks_dir=sam,
                                         nm=nm,
                                         anno_dir=annotate_dir))
            subprocess.check_call(['sbatch', job_file])
            job_file = None

Example #2

0

Show file

File: postqc.py Project: ashwini06/seqkit

def bamcov(project, genefile, input_file, mode):
    """Will run the postqc"""
    root_dir = conf.get('root_dir','')
    proj_dir = os.path.join(root_dir, project)
    if mode == "scale":
        assign_mode = conf.get('computematrix_scale','')
    else: 
        assign_mode = conf.get('computematrix_TSS','')
    sbatch_template = ('#!/bin/bash -l\n'
                       '#SBATCH -A b2012025\n'
                       '#SBATCH -J {name}_postqc\n'
                       '#SBATCH -p core -n 3 \n'
                       '#SBATCH -t 4:00:00\n'
                       '#SBATCH -e '+proj_dir+'/{sample}/scripts/{name}_postqc.stderr\n'
                       '#SBATCH -o '+proj_dir+'/{sample}/scripts/{name}_postqc.stdout\n'
                       '#SBATCH --mail-type=FAIL\n'
                       '#SBATCH --mail-user=\'[email protected]\'\n\n'
                       'module load bioinfo-tools\n'
                       'module load deepTools/2.2.3\n'
                       #'module load ngsplot/2.61\n\n'
                       )
                    
    template = ('bamCompare -b1 {treatment} -b2 {control} --binSize 25 --ratio log2 --scaleFactorsMethod "readCount" -o {postqc_dir}/{treat}_Vs_{ctrl}_log2ratio_readcount.bw --normalizeUsingRPKM\n'
                ''+assign_mode+'\n'
                'plotHeatmap -m {postqc_dir}/matrix.mat.gz -out {postqc_dir}/{treat}_Vs_{ctrl}_heatmap_v2.png --heatmapHeight 25 --heatmapWidth 3 --whatToShow \'heatmap and colorbar\' --sortUsing max\n'
                )


    bed_file = genefile
    pk_file = open(input_file,'r')
    pk_file.next()
    for ln in iter(pk_file):
        ln = ln.strip()
        ln =  ln.split('\t')
        treat = ln[0]
        ctrl = ln[1]
        postqc_dir = os.path.join(proj_dir,treat,"deepTools")
        if not os.path.exists(postqc_dir):
            os.mkdir(postqc_dir)
        treat_fl = glob("{}/{}/alignment_*/bam_files/{}*sorted_rmdup_v1.bam".format(proj_dir,treat,treat))
        control_fl = glob("{}/{}/alignment_*/bam_files/{}*sorted_rmdup_v1.bam".format(proj_dir,ctrl,ctrl))
        for sam in treat_fl:
            suf_s = os.path.basename(sam)
            suf_s = suf_s.replace("_sorted_rmdup_v1.bam","")
            for con in control_fl:  
                con_c = os.path.basename(con)
                con_c = con_c.replace("_sorted_rmdup_v1.bam","")
                name = "{}_Vs_{}".format(suf_s,con_c)
                job_file = os.path.join(proj_dir,treat,"{}/{}_{}.sh".format("scripts",name,"postqc"))
                template_pc = sbatch_template+template
                with open(job_file, 'w') as jb_fl:
                    jb_fl.write(template_pc.format(sample=treat, treat=suf_s, ctrl=con_c, name=name, treatment=sam, control=con, bed_file=bed_file, postqc_dir=postqc_dir))

Example #3

0

Show file

File: preqc.py Project: freshsunxwk/seqkit

def run_qc(project):
    """Will run the QC"""
    root_dir = conf.get('root_dir','')
    proj_dir = os.path.join(root_dir, project)
    fastqc_sbatch_template = ('#!/bin/bash -l\n'
                              '#SBATCH -A b2012025\n'
                              '#SBATCH -J {sam}_fastqc\n'
                              '#SBATCH -p core -n 2 \n' 
                              '#SBATCH -t 2:00:00\n'
                              '#SBATCH -e {sam_dir}/scripts/{sam}_fastqc.stderr\n'
                              '#SBATCH -o {sam_dir}/scripts/{sam}_fastqc.stdout\n'
                              '#SBATCH --mail-type=FAIL\n'
                              '#SBATCH --mail-user=\'[email protected]\'\n\n'
                              'module load bioinfo-tools\n'
                              'module load FastQC/0.11.5\n'
                              'cd '+proj_dir+'\n'
                              'fastqc -o {fastqc_dir} -f fastq {fq_files}\n')
    samples = find_samples(proj_dir,file_type="fastq")
    for sam in samples.keys():
        fq_fls = samples[sam]
        sam_dir = os.path.join(proj_dir, sam)
        src_dir = os.path.join(sam_dir, 'scripts')
        if not os.path.exists(src_dir):
            os.mkdir(src_dir)
        fastqc_dir = os.path.join(sam_dir,'fastqc')
        if not os.path.exists(fastqc_dir):
            os.mkdir(fastqc_dir)
        job_file = os.path.join(src_dir, "{}_fastqc.sh".format(sam))
        with open(job_file, 'w') as jb_fl:
            jb_fl.write(fastqc_sbatch_template.format(sam=sam, sam_dir=sam_dir,fastqc_dir=fastqc_dir, fq_files=" ".join(fq_fls)))
        subprocess.check_call(['sbatch',job_file])

Example #4

0

Show file

File: motif.py Project: ashwini06/seqkit

def run_denovo(project,peak_call,slurm=False,job_file=None):
    """ Will run the de-novo motif analysis """
    root_dir = conf.get('root_dir','')
    proj_dir = os.path.join(root_dir,project)
    sample = map(str,glob(os.path.join(proj_dir, "*", "*{}*".format(peak_call),"*xls")))
    motif_r = os.path.join(os.path.dirname(utils.__file__),"motifanalysis.r")
    sbatch_template = ('#!/bin/bash -l\n'
    '#SBATCH -A b2012025\n'
    '#SBATCH -J {nm}_motifanalysis\n'
    '#SBATCH -p core -n 1 \n'
    '#SBATCH -t 3:00:00\n'
    '#SBATCH --mail-type=FAIL\n'
    '#SBATCH --mail-user=\'[email protected]\'\n\n')
    template_denovo = ('\n## Running de-nove motif analysis\n'
    'module load bioinfo-tools\n'
    'module load MEMEsuite/4.11.1\n'    
    'Rscript '+motif_r+' {ip_fl} {op_dir} {op_fl}\n'
    )

    for xls in sample:
        nm = os.path.basename(xls).replace(".xls","")
        op_dir = os.path.join(os.path.dirname(os.path.dirname(xls)),"motif")
        if not os.path.exists(op_dir):
            os.mkdir(op_dir)
        op_fl = nm +("_seq.fa")
        job_file = os.path.join(os.path.dirname(os.path.dirname(xls)),"scripts","{}_denovo.sh".format(nm))
        template = sbatch_template+template_denovo
        with open(job_file,'w') as jb_fl:
            jb_fl.write(template.format(ip_fl=xls,op_dir=op_dir,op_fl=op_fl,nm=nm))

Example #5

0

Show file

File: motif.py Project: freshsunxwk/seqkit

def run_denovo(project, peak_call, slurm=False, job_file=None):
    """ Will run the de-novo motif analysis """
    root_dir = conf.get('root_dir', '')
    proj_dir = os.path.join(root_dir, project)
    sample = map(
        str, glob(os.path.join(proj_dir, "*", "*{}*".format(peak_call),
                               "*xls")))
    motif_r = os.path.join(os.path.dirname(utils.__file__), "motifanalysis.r")
    sbatch_template = (
        '#!/bin/bash -l\n'
        '#SBATCH -A b2012025\n'
        '#SBATCH -J {nm}_motifanalysis\n'
        '#SBATCH -p core -n 1 \n'
        '#SBATCH -t 3:00:00\n'
        '#SBATCH --mail-type=FAIL\n'
        '#SBATCH --mail-user=\'[email protected]\'\n\n')
    template_denovo = ('\n## Running de-nove motif analysis\n'
                       'module load bioinfo-tools\n'
                       'module load MEMEsuite/4.11.1\n'
                       'Rscript ' + motif_r + ' {ip_fl} {op_dir} {op_fl}\n')

    for xls in sample:
        nm = os.path.basename(xls).replace(".xls", "")
        op_dir = os.path.join(os.path.dirname(os.path.dirname(xls)), "motif")
        if not os.path.exists(op_dir):
            os.mkdir(op_dir)
        op_fl = nm + ("_seq.fa")
        job_file = os.path.join(os.path.dirname(os.path.dirname(xls)),
                                "scripts", "{}_denovo.sh".format(nm))
        template = sbatch_template + template_denovo
        with open(job_file, 'w') as jb_fl:
            jb_fl.write(
                template.format(ip_fl=xls, op_dir=op_dir, op_fl=op_fl, nm=nm))

Example #6

0

Show file

File: htcuff.py Project: freshsunxwk/seqkit

def run_htcuff(project, aligner, sample=None):
    """Will run the cuuflinks and htseq"""
    root_dir = conf.get('root_dir', '')
    proj_dir = os.path.join(root_dir, project)
    align_template = (
        '#!/bin/bash -l\n'
        '#SBATCH -A b2012025\n'
        '#SBATCH -J {sam}_htcuff\n'
        '#SBATCH -p core -n 1 \n'
        '#SBATCH -t 10:00:00\n'
        '#SBATCH --mail-type=FAIL\n'
        '#SBATCH --mail-user=\'[email protected]\'\n\n'
        '#SBATCH -e {sam_dir}/scripts/{sam}_htcuff.stderr\n'
        '#SBATCH -o {sam_dir}/scripts/{sam}_htcuff.stdout\n'
        'module load bioinfo-tools\n'
        'module load samtools/1.3\n'
        'module load cufflinks/2.2.1\n'
        'module load htseq/0.6.1\n\n'
        'genome_fl=\"/pica/data/uppnex/igenomes/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf\"\n'
        'for bam in $(ls --color=never {align_dir}/*_sorted.bam);do\n'
        'nm=$(basename ${{bam}})\n'
        'nm=${{nm/.bam/}}\n'
        'htseq-count -s reverse -q -f bam ${{bam}} ${{genome_fl}} > {ht_dir}/${{nm}}_counts.txt\n'
        'cufflinks -p 8 --library-type fr-firststrand -G ${{genome_fl}} -o {cuff_dir}/${{nm}}_cufflinks ${{bam}}\n'
        'done\n\n')

    if sample:
        if os.path.isdir(os.path.join(proj_dir, sample)):
            samples = [sample]
        else:
            raise SystemExit(
                "Given sample {} is not found in project directory {}".format(
                    sample, proj_dir))
    else:
        samples = find_samples(proj_dir)

    for sam in samples:
        sam_dir = os.path.join(proj_dir, sam)
        src_dir = os.path.join(sam_dir, 'scripts')
        align_dir = os.path.join(sam_dir, "alignment_{}".format(aligner),
                                 "bam_files")
        bam_fl = ''.join(glob("{}/{}*"))
        ht_dir = os.path.join(sam_dir, 'htseq')
        if not os.path.exists(ht_dir):
            os.mkdir(ht_dir)
        cuff_dir = os.path.join(sam_dir, 'cufflinks')
        if not os.path.exists(cuff_dir):
            os.mkdir(cuff_dir)
        job_file = os.path.join(src_dir, "{}_{}.sh".format(sam, "htcuff"))
        with open(job_file, 'w') as jb_fl:
            jb_fl.write(
                align_template.format(sam=sam,
                                      sam_dir=sam_dir,
                                      align_dir=align_dir,
                                      ht_dir=ht_dir,
                                      cuff_dir=cuff_dir))
        subprocess.check_call(['sbatch', job_file])

Example #7

0

Show file

def run_b2b(project, aligner, sample=None, slurm=False, job_file=None):
    """ Will run the bam to bed file conversion """
    root_dir = conf.get('root_dir', '')
    proj_dir = os.path.join(root_dir, project)
    if sample:
        if os.path.isdir(os.path.join(proj_dir, sample)):
            samples = [sample]
        else:
            raise SystemExit(
                "Given sample {} is not found in project directory {}".format(
                    sample, proj_dir))
    else:
        samples = find_samples(proj_dir)

    for sam in samples:
        sbatch_template = (
            '#!/bin/bash -l\n'
            '#SBATCH -A b2012025\n'
            '#SBATCH -J {sam}_bam2bed\n'
            '#SBATCH -p core -n 1 \n'
            '#SBATCH -t 3:00:00\n'
            '#SBATCH --mail-type=FAIL\n'
            '#SBATCH --mail-user=\'[email protected]\'\n\n')
        template_b2b = (
            '## run bam to bed\n'
            'module load BEDTools/2.11.2\n'
            'for bam in $(ls --color=never {sam_dir}/alignment_{aligner}/bam_files/*sorted_rmdup.bam);do\n'
            'bed_fl=${{bam/.bam/.bed}}\n'
            'bed_fl=${{bed_fl/bam_files/bedfiles}}\n'
            'bed_uniq_fl=${{bed_fl/.bed/_uniq.bed}}\n'
            'bamToBed -i ${{bam}} > ${{bed_fl}}\n'
            'awk -F\\\\t -v \'OFS=\\t\' \'{{print $1,$2,$3,".",$5,$6}}\' ${{bed_fl}} | sort -u > ${{bed_uniq_fl}}\n'
            'rm ${{bed_fl}}\n'
            'done\n')
        sam_dir = os.path.join(proj_dir, sam)
        bed_dir = os.path.join(sam_dir, "alignment_{}".format(aligner),
                               "bedfiles")
        if not os.path.exists(bed_dir):
            #pdb.set_trace()
            os.mkdir(bed_dir)
        if job_file:
            with open(job_file, 'a') as jb_fl:
                jb_fl.write(
                    template_b2b.format(sam_dir=sam_dir, aligner=aligner))
            return
        if slurm:
            job_file = os.path.join(sam_dir, "scripts",
                                    "{}_{}_bamTobed.sh".format(sam, aligner))
            template_b2b = sbatch_template + template_b2b
            with open(job_file, 'w') as jb_fl:
                jb_fl.write(
                    template_b2b.format(sam=sam,
                                        sam_dir=sam_dir,
                                        aligner=aligner))
                subprocess.check_call(['sbatch', job_file])
            job_file = None

Example #8

0

Show file

File: peakanalysis.py Project: ashwini06/seqkit

def run_peakanno(project,peak_call,slurm=False,job_file=None):
    """ Will run the peak annotation on the peak called regions """
    root_dir = conf.get('root_dir','')
    proj_dir = os.path.join(root_dir,project)
    samples = map(str,glob(os.path.join(proj_dir, "*", "*{}*".format(peak_call))))
    TSS_cmd = conf.get ('anno_TSS','')
    NDG_cmd = conf.get ('anno_NDG','') 
    sbatch_template = ('#!/bin/bash -l\n'
    '#SBATCH -A b2012025\n'
    '#SBATCH -J {nm}_peakanno\n'
    '#SBATCH -p core -n 1 \n'
    '#SBATCH -t 3:00:00\n'
    '#SBATCH --mail-type=FAIL\n'
    '#SBATCH --mail-user=\'[email protected]\'\n\n')
    '#SBATCH -o {peaks_dir}/{name}_peakcall.stdout\n'
    '#SBATCH -e {peaks_dir}/{name}_peakcall.stderr\n'

    template_peakanno = ('\n## Running peak-annotations\n'
    'for bed in $(ls --color=never {peaks_dir}/*narrowPeak);do\n'
    'cut -f1-6 $bed > {peaks_dir}/{nm}_annotate \n'
    ''+TSS_cmd+'\n'
    ''+NDG_cmd+'\n'
    'python '+col_match.__file__+' {anno_dir}/{nm}_annotate.tss {anno_dir}/{nm}_annotate.ndg {anno_dir}/{nm}_merged "merge"\n'
    'rm {peaks_dir}/{nm}_annotate\n'
    'done\n')             
    
    for sam in samples:
        sam_dir=os.path.split(sam)[0]
        nm=os.path.basename(sam_dir)
        annotate_dir = os.path.join(sam_dir,"peakannotate")
        if not os.path.exists(annotate_dir):
            os.makedirs(annotate_dir)
        if job_file:
            with open(job_file,'a') as jb_fl:
                jb_fl.write(template_peakanno.format(peaks_dir=sam,nm=nm,anno_dir=annotate_dir))
            return
        if slurm:
            job_file = os.path.join(sam_dir,"scripts","{}_peakannotate.sh".format(nm))
            template_anno = sbatch_template+template_peakanno
            with open(job_file,'w') as jb_fl:
                jb_fl.write(template_anno.format(peaks_dir=sam,nm=nm,anno_dir=annotate_dir))
            subprocess.check_call(['sbatch',job_file])
            job_file = None

Example #9

0

Show file

File: analysis.py Project: senthil10/seqkit

def run_align(project,aligner):
	"""Will run the preferred-alignment"""
	root_dir = conf.get('root_dir','')
	proj_dir = os.path.join(root_dir, project)
	if aligner == "bwa":
		align_module = 'module load bwa/0.7.12\n'
		align_index = '/pica/data/uppnex/igenomes/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa'
    		align_block = ('bwa aln {align_index} ${{run}} > {align_dir}/${{nm}}.sai\n'
                       		'bwa samse {align_index} {align_dir}/${{nm}}.sai ${{run}}.fastq | samtools view -Sb - > {align_dir}/${{nm}}.bam\n'
				'rm ${{nm}}.sai\n')
	elif aligner == "bowtie2":
		align_module = 'module load bowtie2/2.2.6\n'
		align_index = "/pica/data/uppnex/igenomes/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/genome"
		align_block =  ('bowtie2 -t -p 8 -k2 --very-sensitive -x {align_index} -q ${{run}} -S {align_dir}/${{nm}}.sam > {align_dir}/${{nm}}_bowtie2.log\n'
                       'samtools view -bS -o {align_dir}/${{nm}}.bam {align_dir}/${{nm}}.sam\n'
                       'rm {align_dir}/${{nm}}.sam\n')

	align_template = ('#!/bin/bash -l\n'
    		           '#SBATCH -A b2012025\n'
    		           '#SBATCH -J {sam}_align\n'
    		           '#SBATCH -p core -n 2 \n'	
    		           '#SBATCH -t	10:00:00\n'
    		           '#SBATCH --mail-type=FAIL\n'
    		           '#SBATCH --mail-user=\'[email protected]\'\n\n'
    		           'module load bioinfo-tools\n'
    		           'module load samtools/1.3\n'
			   ''+align_module+''
		 	   'cd '+proj_dir+'\n'
			   'if [[ $(ls {sam}/Rawdata/*gz | wc -l) -gt 0 ]]; then gzip -d {sam}/Rawdata/*gz; fi\n'
			   'for run in {fq_files};do\n'
			   'nm=$(basename ${{run}})\n'
			   'nm=${{nm/.fastq/}}\n'
			   ''+align_block+''
			   'done\n'
                        )
	samples = find_samples(proj_dir)
	for sam in samples.keys():
        	fq_fls = samples[sam]
        	sam_dir = os.path.join(proj_dir, sam)
		src_dir = os.path.join(sam_dir, 'scripts')
        	if not os.path.exists(src_dir):
        		os.mkdir(src_dir)
		align_dir = os.path.join(sam_dir,aligner)
		if not os.path.exists(align_dir):
			os.mkdir(align_dir)
        	job_file = os.path.join(src_dir, "{}_{}.sh".format(sam,aligner))
		with open(job_file, 'w') as jb_fl:
        		jb_fl.write(align_template.format(sam=sam, sam_dir=sam_dir,align_dir=align_dir,proj_dir=proj_dir,align_index=align_index, fq_files=" ".join(fq_fls)))

Example #10

0

Show file

def run_dr(project, input_file):
    """ Will run the idr analysis to check for biological replicate consistence """
    root_dir = conf.get('root_dir', '')
    proj_dir = os.path.join(root_dir, project)
    sbatch_template = (
        '#!/bin/bash -l\n'
        '#SBATCH -A b2012025\n'
        '#SBATCH -J {name}_peakcall\n'
        '#SBATCH -p core -n 1 \n'
        '#SBATCH -t 5:00:00\n'
        '#SBATCH -o ' + proj_dir +
        '/{rep1}_Vs_{rep2}/scripts/{name}_idr.stdout\n'
        '#SBATCH -e ' + proj_dir +
        '/{rep1}_Vs_{rep2}/scripts/{name}_idr.stderr\n'
        '#SBATCH --mail-type=FAIL\n'
        '#SBATCH --mail-user=\'[email protected]\'\n\n'
        'module load bioinfo-tools\n'
        'sort -k 8,8nr {rep1_dir}/*.narrowPeak > {rep1_dir}/tmp.regionPeak\n'
        'intersectBed -a {rep1_dir}/tmp.regionPeak -b {mm10_blacklisted-regions.bed} > {rep1_dir}/cleanedpeaks.regionPeak\n'
        'sort -k 8,8nr {rep2_dir}/*.narrowPeak > {rep2_dir}/tmp.regionPeak\n'
        'intersectBed -a {rep2_dir}/tmp.regionPeak -b {mm10_blacklisted-regions.bed} > {rep2_dir}/cleanedpeaks.regionPeak\n'
        'Rscript batch-consistency-analysis.r rep1,rep2 -1 idr_op 0 F p.value {mm10.genome}\n'
    )

    pk_file = open(input_file, 'r')
    pk_file.next()
    for ln in iter(pk_file):
        ln = ln.strip()
        ln = ln.split('\t')
        rep1 = ln[0]
        rep2 = ln[1]
        name = "{}_Vs_{}".format(rep1, rep2)
        rep1_dir = ''.join(glob("{}/{}/macs2_*".format(proj_dir, rep1)))
        peaks_dir = os.path.join(proj_dir, treat,
                                 "{}_{}".format(peak_call, mode))
        if not os.path.exists(peaks_dir):
            os.makedirs(peaks_dir)
        job_fl = os.path.join(proj_dir, treat, "scripts",
                              "{}_peakcall.sh".format(name))
        template_pc = sbatch_template + template
        with open(job_fl, 'w') as jb_fl:
            jb_fl.write(
                template_pc.format(name=name,
                                   treat=treat,
                                   treatment=treat_fl,
                                   control=control_fl,
                                   peaks_dir=peaks_dir))
    subprocess.check_call(['sbatch', job_fl])

Example #11

0

Show file

File: analysis.py Project: ashwini06/seqkit

def run_b2b(project, aligner, sample=None, slurm=False, job_file=None):
  """ Will run the bam to bed file conversion """
  root_dir = conf.get('root_dir','')
  proj_dir = os.path.join(root_dir, project)
  if sample:
    if os.path.isdir(os.path.join(proj_dir, sample)):
      samples = [sample]  
    else:
      raise SystemExit("Given sample {} is not found in project directory {}".format(sample, proj_dir))
  else:
    samples = find_samples(proj_dir)

  for sam in samples:
    sbatch_template = ('#!/bin/bash -l\n'
                      '#SBATCH -A b2012025\n'
                   		'#SBATCH -J {sam}_bam2bed\n'
                   		'#SBATCH -p core -n 1 \n'    
                   		'#SBATCH -t 3:00:00\n'
                   		'#SBATCH --mail-type=FAIL\n'
                   		'#SBATCH --mail-user=\'[email protected]\'\n\n')
    template_b2b = ('## run bam to bed\n'
                    'module load BEDTools/2.11.2\n'
                    'for bam in $(ls --color=never {sam_dir}/alignment_{aligner}/bam_files/*sorted_rmdup.bam);do\n'
                    'bed_fl=${{bam/.bam/.bed}}\n'
                    'bed_fl=${{bed_fl/bam_files/bedfiles}}\n'
                    'bed_uniq_fl=${{bed_fl/.bed/_uniq.bed}}\n'
                    'bamToBed -i ${{bam}} > ${{bed_fl}}\n'
                    'awk -F\\\\t -v \'OFS=\\t\' \'{{print $1,$2,$3,".",$5,$6}}\' ${{bed_fl}} | sort -u > ${{bed_uniq_fl}}\n'
                    'rm ${{bed_fl}}\n'
                    'done\n')
    sam_dir = os.path.join(proj_dir, sam)
    bed_dir = os.path.join(sam_dir, "alignment_{}".format(aligner), "bedfiles")
    if not os.path.exists(bed_dir):
      #pdb.set_trace()
      os.mkdir(bed_dir)
    if job_file:
      with open(job_file, 'a') as jb_fl:
        jb_fl.write(template_b2b.format(sam_dir=sam_dir, aligner=aligner))
      return
    if slurm:
      job_file = os.path.join(sam_dir,"scripts","{}_{}_bamTobed.sh".format(sam, aligner))
      template_b2b = sbatch_template + template_b2b
      with open(job_file, 'w') as jb_fl:
        jb_fl.write(template_b2b.format(sam=sam, sam_dir=sam_dir, aligner=aligner))
        subprocess.check_call(['sbatch',job_file])
      job_file = None

Example #12

0

Show file

File: peakanalysis.py Project: ashwini06/seqkit

def run_peakcall(project, input_file, mode, peak_call,peakannotate):
    """ Will run the prefered peak-calling software """
    root_dir = conf.get('root_dir','')  
    proj_dir = os.path.join (root_dir,project)
    load_module = ('module load MACS/2.1.0\n')
    sbatch_template = ('#!/bin/bash -l\n'
                       '#SBATCH -A b2012025\n'
                       '#SBATCH -J {name}_peakcall\n'
                       '#SBATCH -p core -n 1 \n'
                       '#SBATCH -t 5:00:00\n'
                       '#SBATCH -o '+proj_dir+'/{treat}/scripts/{name}_peakcall.stdout\n'
                       '#SBATCH -e '+proj_dir+'/{treat}/scripts/{name}_peakcall.stderr\n'
                       '#SBATCH --mail-type=FAIL\n'
                       '#SBATCH --mail-user=\'[email protected]\'\n\n'
                       'module load bioinfo-tools\n'
                        )
    if mode == "TF":
        if peak_call == "macs2":
            macs2_cmd = conf.get('macs2_TF','')
            template = ('## Running Peak-calling for TF-ChIP data\n'
                        ''+load_module+''
                        ''+macs2_cmd+''
                        )
        else:
            raise SystemExit("Please mention the type of peak caller - macs2")
    elif mode == "HM":   
        if peak_call == "macs2":
            macs2_cmd = conf.get('macs2_HM','')
            template = ('# Running macs2 peak-calling for HM data\n'
                        ''+load_module+''
                        ''+macs2_cmd+'')
    
        elif peak_call == "danpos2":
            danpos_path = "cd /home/ashwini/softwares/danpos-2.2.2"
            danpos_cmd = conf.get('danpos2_dpeak','')
            template = ('# Running danpos2 peakcalling for HM data\n'
                        ''+danpos_cmd+'')
        else:
            raise SystemExit("Please mention the type of peak_Caller (macs2/danpos2)")
    else:
        raise SystemExit("Please mention the type of mode - either TF or HM")

        
    pk_file = open(input_file,'r')
    pk_file.next()
    for ln in iter(pk_file):    
        ln = ln.strip()
        ln =  ln.split('\t')
        treat = ln[0]
        ctrl = ln[1]
        treat_fl = glob("{}/{}/alignment_*/bedfiles/{}*rmdup_uniq.bed".format(proj_dir,treat,treat))
        control_fl = glob("{}/{}/alignment_*/bedfiles/{}*rmdup_uniq.bed".format(proj_dir,ctrl,ctrl))
        peaks_dir = os.path.join(proj_dir,treat,"{}_{}".format(peak_call,mode))
        if not os.path.exists(peaks_dir):
            os.makedirs(peaks_dir)
        for sam in treat_fl:
            suf_s = os.path.basename(sam)
            suf_s = suf_s.replace("_sorted_rmdup_uniq.bed","")
            for con in control_fl:
                con_c = os.path.basename(con)
                con_c = con_c.replace("_sorted_rmdup_uniq.bed","")
                name = "{}_Vs_{}".format(suf_s,con_c)
                job_fl = os.path.join(proj_dir,treat,"scripts","{}_peakcall.sh".format(name))
                template_pc = sbatch_template + template
                with open(job_fl,'w') as jb_fl:
                    jb_fl.write(template_pc.format(name=name,treat=treat, treatment=sam, control=con,peaks_dir=peaks_dir))
                if peakannotate:
                    run_peakanno(project=project,peak_call=peak_call,slurm=True,job_file=job_fl)        
                subprocess.check_call(['sbatch',job_fl])

Example #13

0

Show file

def run_peakcall(project, input_file, mode, peak_call, peakannotate):
    """ Will run the prefered peak-calling software """
    root_dir = conf.get('root_dir', '')
    proj_dir = os.path.join(root_dir, project)
    load_module = ('module load MACS/2.1.0\n')
    sbatch_template = (
        '#!/bin/bash -l\n'
        '#SBATCH -A b2012025\n'
        '#SBATCH -J {name}_peakcall\n'
        '#SBATCH -p core -n 1 \n'
        '#SBATCH -t 5:00:00\n'
        '#SBATCH -o ' + proj_dir + '/{treat}/scripts/{name}_peakcall.stdout\n'
        '#SBATCH -e ' + proj_dir + '/{treat}/scripts/{name}_peakcall.stderr\n'
        '#SBATCH --mail-type=FAIL\n'
        '#SBATCH --mail-user=\'[email protected]\'\n\n'
        'module load bioinfo-tools\n')
    if mode == "TF":
        if peak_call == "macs2":
            macs2_cmd = conf.get('macs2_TF', '')
            template = ('## Running Peak-calling for TF-ChIP data\n'
                        '' + load_module + ''
                        '' + macs2_cmd + '')
        else:
            raise SystemExit("Please mention the type of peak caller - macs2")
    elif mode == "HM":
        if peak_call == "macs2":
            macs2_cmd = conf.get('macs2_HM', '')
            template = ('# Running macs2 peak-calling for HM data\n'
                        '' + load_module + ''
                        '' + macs2_cmd + '')

        elif peak_call == "danpos2":
            danpos_path = "cd /home/ashwini/softwares/danpos-2.2.2"
            danpos_cmd = conf.get('danpos2_dpeak', '')
            template = ('# Running danpos2 peakcalling for HM data\n'
                        '' + danpos_cmd + '')
        else:
            raise SystemExit(
                "Please mention the type of peak_Caller (macs2/danpos2)")
    else:
        raise SystemExit("Please mention the type of mode - either TF or HM")

    pk_file = open(input_file, 'r')
    pk_file.next()
    for ln in iter(pk_file):
        ln = ln.strip()
        ln = ln.split('\t')
        treat = ln[0]
        ctrl = ln[1]
        treat_fl = glob("{}/{}/alignment_*/bedfiles/{}*rmdup_uniq.bed".format(
            proj_dir, treat, treat))
        control_fl = glob(
            "{}/{}/alignment_*/bedfiles/{}*rmdup_uniq.bed".format(
                proj_dir, ctrl, ctrl))
        peaks_dir = os.path.join(proj_dir, treat,
                                 "{}_{}".format(peak_call, mode))
        if not os.path.exists(peaks_dir):
            os.makedirs(peaks_dir)
        for sam in treat_fl:
            suf_s = os.path.basename(sam)
            suf_s = suf_s.replace("_sorted_rmdup_uniq.bed", "")
            for con in control_fl:
                con_c = os.path.basename(con)
                con_c = con_c.replace("_sorted_rmdup_uniq.bed", "")
                name = "{}_Vs_{}".format(suf_s, con_c)
                job_fl = os.path.join(proj_dir, treat, "scripts",
                                      "{}_peakcall.sh".format(name))
                template_pc = sbatch_template + template
                with open(job_fl, 'w') as jb_fl:
                    jb_fl.write(
                        template_pc.format(name=name,
                                           treat=treat,
                                           treatment=sam,
                                           control=con,
                                           peaks_dir=peaks_dir))
                if peakannotate:
                    run_peakanno(project=project,
                                 peak_call=peak_call,
                                 slurm=True,
                                 job_file=job_fl)
                subprocess.check_call(['sbatch', job_fl])

Example #14

0

Show file

File: postqc.py Project: freshsunxwk/seqkit

def bamcov(project, genefile, input_file, mode):
    """Will run the postqc"""
    root_dir = conf.get('root_dir', '')
    proj_dir = os.path.join(root_dir, project)
    if mode == "scale":
        assign_mode = conf.get('computematrix_scale', '')
    else:
        assign_mode = conf.get('computematrix_TSS', '')
    sbatch_template = (
        '#!/bin/bash -l\n'
        '#SBATCH -A b2012025\n'
        '#SBATCH -J {name}_postqc\n'
        '#SBATCH -p core -n 3 \n'
        '#SBATCH -t 4:00:00\n'
        '#SBATCH -e ' + proj_dir + '/{sample}/scripts/{name}_postqc.stderr\n'
        '#SBATCH -o ' + proj_dir + '/{sample}/scripts/{name}_postqc.stdout\n'
        '#SBATCH --mail-type=FAIL\n'
        '#SBATCH --mail-user=\'[email protected]\'\n\n'
        'module load bioinfo-tools\n'
        'module load deepTools/2.2.3\n'
        #'module load ngsplot/2.61\n\n'
    )

    template = (
        'bamCompare -b1 {treatment} -b2 {control} --binSize 25 --ratio log2 --scaleFactorsMethod "readCount" -o {postqc_dir}/{treat}_Vs_{ctrl}_log2ratio_readcount.bw --normalizeUsingRPKM\n'
        '' + assign_mode + '\n'
        'plotHeatmap -m {postqc_dir}/matrix.mat.gz -out {postqc_dir}/{treat}_Vs_{ctrl}_heatmap_v2.png --heatmapHeight 25 --heatmapWidth 3 --whatToShow \'heatmap and colorbar\' --sortUsing max\n'
    )

    bed_file = genefile
    pk_file = open(input_file, 'r')
    pk_file.next()
    for ln in iter(pk_file):
        ln = ln.strip()
        ln = ln.split('\t')
        treat = ln[0]
        ctrl = ln[1]
        postqc_dir = os.path.join(proj_dir, treat, "deepTools")
        if not os.path.exists(postqc_dir):
            os.mkdir(postqc_dir)
        treat_fl = glob(
            "{}/{}/alignment_*/bam_files/{}*sorted_rmdup_v1.bam".format(
                proj_dir, treat, treat))
        control_fl = glob(
            "{}/{}/alignment_*/bam_files/{}*sorted_rmdup_v1.bam".format(
                proj_dir, ctrl, ctrl))
        for sam in treat_fl:
            suf_s = os.path.basename(sam)
            suf_s = suf_s.replace("_sorted_rmdup_v1.bam", "")
            for con in control_fl:
                con_c = os.path.basename(con)
                con_c = con_c.replace("_sorted_rmdup_v1.bam", "")
                name = "{}_Vs_{}".format(suf_s, con_c)
                job_file = os.path.join(
                    proj_dir, treat,
                    "{}/{}_{}.sh".format("scripts", name, "postqc"))
                template_pc = sbatch_template + template
                with open(job_file, 'w') as jb_fl:
                    jb_fl.write(
                        template_pc.format(sample=treat,
                                           treat=suf_s,
                                           ctrl=con_c,
                                           name=name,
                                           treatment=sam,
                                           control=con,
                                           bed_file=bed_file,
                                           postqc_dir=postqc_dir))

Example #15

0

Show file

File: analysis.py Project: ashwini06/seqkit

def run_align(project, aligner, genome, sample, bam_to_bed):
  """Will run the preferred-alignment"""
  root_dir = conf.get('root_dir','')
  proj_dir = os.path.join(root_dir, project)
  bed_dir = ''
  if aligner == "bwa":
    align_module = 'module load bwa/0.7.12\n'
    align_index = conf['genome_index'][genome][aligner]
    align_block = ('bwa aln {align_index} ${{fq}} > {align_dir}/${{nam}}.sai\n'
                    'bwa samse {align_index} {align_dir}/${{nam}}.sai ${{fq}} | samtools view -Sb - > {align_dir}/${{nam}}.bam\n'
                    'rm ${{nam}}.sai\n')
  elif aligner == "bowtie2":
    align_module = 'module load bowtie2/2.2.6\n'
    align_index = conf['genome_index'][genome][aligner]
    align_block =  ('bowtie2 -t -p 8 -k2 --very-sensitive -x {align_index} -q ${{fq}} -S {align_dir}/${{nam}}.sam 2> {align_dir}/${{nam}}_bowtie2.log\n\n'
                    'samtools view -bS -o {align_dir}/${{nam}}.bam {align_dir}/${{nam}}.sam\n\n'
                    'rm {align_dir}/${{nam}}.sam\n\n')
  elif aligner == "bowtie":
    align_module = 'module load bowtie/1.1.2\n'
    align_index = conf['genome_index'][genome][aligner]
    align_block =  ('bowtie -q -m 1 -v 3 --best --strata {align_index} ${{fq}} -S {align_dir}/${{nam}}.sam 2>{align_dir}/${{nam}}_bowtie.log\n\n'
                    'samtools view -bS -o {align_dir}/${{nam}}.bam {align_dir}/${{nam}}.sam\n\n')
  elif aligner == "STAR":
    align_module = 'module load star/2.3.1o\n'
    align_index = conf['genome_index'][genome][aligner] 
    align_block = ("STAR --genomeDir {align_index} --readFilesIn ${{fq}} --outFilterIntronMotifs RemoveNoncanonical --outFileNamePrefix {align_dir}/${{nam}} --outSAMmode Full --runThreadN 8 --outFilterType BySJout --alignSJDBoverhangMin 1 --outFilterMismatchNmax 5\n\n")
    
  elif aligner == "tophat":
    align_module = ('module load tophat/2.0.12\n'
                    'module load bowtie2/2.2.6\n')
    align_index = conf['genome_index'][genome][aligner]
    align_gtf = "/pica/data/uppnex/igenomes/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf"
    align_block = ("tophat -o {align_dir}/${{nam}} -G ${{align_gtf}} -p 8 --library-type fr-firststrand --solexa1.3-quals {align_index} ${{fq}} \n\n"
                    "mv {align_dir}/${{nam}}/accepted_hits.bam {align_dir}/${{nam}}.bam\n") 

    align_template = ('#!/bin/bash -l\n'
                      '#SBATCH -A b2012025\n'
                      '#SBATCH -J {sam}_align\n'
                      '#SBATCH -p core -n 4 \n' 
                      '#SBATCH -t 10:00:00\n'
                      '#SBATCH --mail-type=FAIL\n'
                      '#SBATCH --mail-user=\'[email protected]\'\n\n'
                      '#SBATCH -e {sam_dir}/scripts/{sam}_align.stderr\n'
                      '#SBATCH -o {sam_dir}/scripts/{sam}_align.stdout\n'
                      'module load bioinfo-tools\n'
                      ''+align_module+''
                      'module load samtools/0.1.19\n'
                      'if [[ $(ls --color=never {sam_dir}/Rawdata/*.gz | wc -l) -gt 0 ]]; then gzip -d {sam_dir}/Rawdata/*.gz; fi\n'
                      'if [[ $(ls --color=never {sam_dir}/Rawdata/*zip | wc -l) -gt 0 ]]; then unzip {sam_dir}/Rawdata/*zip; fi\n'
                      'for fq in $(ls --color=never {sam_dir}/Rawdata/*.fastq);do\n'
                      'nm=$(basename ${{fq}})\n'
                      'nm=${{nm/_*/}}\n' 
                      'nam="{sam}_"${{nm}}\n\n'
                      ''+align_block+''
                      #'samtools view -H {align_dir}/${{nam}}.bam | sed -e \'s/SN:\([0-9XY]\)/SN:chr\\1/\' -e \'s/SN:M/SN:chrM/\' | samtools reheader - {align_dir}/${{nam}}.bam > {align_dir}/${{nam}}_v1.bam\n\n'
                      #'mv {align_dir}/${{nam}}_v1.bam {align_dir}/${{nam}}.bam\n\n'
                      'samtools sort {align_dir}/${{nam}}.bam {align_dir}/${{nam}}_sorted\n\n'
                      'java -jar /pica/sw/apps/bioinfo/picard/1.92/milou/MarkDuplicates.jar INPUT={align_dir}/${{nam}}_sorted.bam OUTPUT={align_dir}/${{nam}}_sorted_rmdup.bam METRICS_FILE={align_dir}/${{nam}}_picardmetrics.txt REMOVE_DUPLICATES=True\n\n'
                      'samtools index {align_dir}/${{nam}}_sorted_rmdup.bam\n\n'
                      'samtools index {align_dir}/${{nam}}_sorted.bam\n\n'
                      '[ -e {align_dir}/${{nam}}_sorted.bam ] && rm {align_dir}/${{nam}}.bam\n\n'
                      '[ -e {align_dir}/${{nam}}.sam ] && rm  {align_dir}/${{nam}}.sam\n\n'
                      'done\n')

  if sample:
    if os.path.isdir(os.path.join(proj_dir, sample)):
      samples = [sample]
    else:
      raise SystemExit("Given sample {} is not found in project directory {}".format(sample, proj_dir))
  else:
    samples = find_samples(proj_dir)

  for sam in samples:
    sam_dir = os.path.join(proj_dir, sam)
    src_dir = os.path.join(sam_dir, 'scripts')
    if not os.path.exists(src_dir):
      os.mkdir(src_dir)
      align_dir = os.path.join(sam_dir,"alignment_{}".format(aligner),"bam_files")
      if not os.path.exists(align_dir):
        os.makedirs(align_dir)
        job_file = os.path.join(src_dir, "{}_{}.sh".format(sam,aligner))
        with open(job_file, 'w') as jb_fl:
          jb_fl.write(align_template.format(sam=sam, sam_dir=sam_dir, align_dir=align_dir,align_index=align_index))
        if bam_to_bed:
          run_b2b(project=project, aligner=aligner, slurm=True, sample=sam, job_file=job_file)
        subprocess.check_call(['sbatch',job_file])

Example #16

0

Show file

def run_align(project, aligner, genome, sample, bam_to_bed):
    """Will run the preferred-alignment"""
    root_dir = conf.get('root_dir', '')
    proj_dir = os.path.join(root_dir, project)
    bed_dir = ''
    if aligner == "bwa":
        align_module = 'module load bwa/0.7.12\n'
        align_index = conf['genome_index'][genome][aligner]
        align_block = (
            'bwa aln {align_index} ${{fq}} > {align_dir}/${{nam}}.sai\n'
            'bwa samse {align_index} {align_dir}/${{nam}}.sai ${{fq}} | samtools view -Sb - > {align_dir}/${{nam}}.bam\n'
            'rm ${{nam}}.sai\n')
    elif aligner == "bowtie2":
        align_module = 'module load bowtie2/2.2.6\n'
        align_index = conf['genome_index'][genome][aligner]
        align_block = (
            'bowtie2 -t -p 8 -k2 --very-sensitive -x {align_index} -q ${{fq}} -S {align_dir}/${{nam}}.sam 2> {align_dir}/${{nam}}_bowtie2.log\n\n'
            'samtools view -bS -o {align_dir}/${{nam}}.bam {align_dir}/${{nam}}.sam\n\n'
            'rm {align_dir}/${{nam}}.sam\n\n')
    elif aligner == "bowtie":
        align_module = 'module load bowtie/1.1.2\n'
        align_index = conf['genome_index'][genome][aligner]
        align_block = (
            'bowtie -q -m 1 -v 3 --best --strata {align_index} ${{fq}} -S {align_dir}/${{nam}}.sam 2>{align_dir}/${{nam}}_bowtie.log\n\n'
            'samtools view -bS -o {align_dir}/${{nam}}.bam {align_dir}/${{nam}}.sam\n\n'
        )
    elif aligner == "STAR":
        align_module = 'module load star/2.3.1o\n'
        align_index = conf['genome_index'][genome][aligner]
        align_block = (
            "STAR --genomeDir {align_index} --readFilesIn ${{fq}} --outFilterIntronMotifs RemoveNoncanonical --outFileNamePrefix {align_dir}/${{nam}} --outSAMmode Full --runThreadN 8 --outFilterType BySJout --alignSJDBoverhangMin 1 --outFilterMismatchNmax 5\n\n"
        )

    elif aligner == "tophat":
        align_module = ('module load tophat/2.0.12\n'
                        'module load bowtie2/2.2.6\n')
        align_index = conf['genome_index'][genome][aligner]
        align_gtf = "/pica/data/uppnex/igenomes/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf"
        align_block = (
            "tophat -o {align_dir}/${{nam}} -G ${{align_gtf}} -p 8 --library-type fr-firststrand --solexa1.3-quals {align_index} ${{fq}} \n\n"
            "mv {align_dir}/${{nam}}/accepted_hits.bam {align_dir}/${{nam}}.bam\n"
        )

        align_template = (
            '#!/bin/bash -l\n'
            '#SBATCH -A b2012025\n'
            '#SBATCH -J {sam}_align\n'
            '#SBATCH -p core -n 4 \n'
            '#SBATCH -t 10:00:00\n'
            '#SBATCH --mail-type=FAIL\n'
            '#SBATCH --mail-user=\'[email protected]\'\n\n'
            '#SBATCH -e {sam_dir}/scripts/{sam}_align.stderr\n'
            '#SBATCH -o {sam_dir}/scripts/{sam}_align.stdout\n'
            'module load bioinfo-tools\n'
            '' + align_module + ''
            'module load samtools/0.1.19\n'
            'if [[ $(ls --color=never {sam_dir}/Rawdata/*.gz | wc -l) -gt 0 ]]; then gzip -d {sam_dir}/Rawdata/*.gz; fi\n'
            'if [[ $(ls --color=never {sam_dir}/Rawdata/*zip | wc -l) -gt 0 ]]; then unzip {sam_dir}/Rawdata/*zip; fi\n'
            'for fq in $(ls --color=never {sam_dir}/Rawdata/*.fastq);do\n'
            'nm=$(basename ${{fq}})\n'
            'nm=${{nm/_*/}}\n'
            'nam="{sam}_"${{nm}}\n\n'
            '' + align_block + ''
            #'samtools view -H {align_dir}/${{nam}}.bam | sed -e \'s/SN:\([0-9XY]\)/SN:chr\\1/\' -e \'s/SN:M/SN:chrM/\' | samtools reheader - {align_dir}/${{nam}}.bam > {align_dir}/${{nam}}_v1.bam\n\n'
            #'mv {align_dir}/${{nam}}_v1.bam {align_dir}/${{nam}}.bam\n\n'
            'samtools sort {align_dir}/${{nam}}.bam {align_dir}/${{nam}}_sorted\n\n'
            'java -jar /pica/sw/apps/bioinfo/picard/1.92/milou/MarkDuplicates.jar INPUT={align_dir}/${{nam}}_sorted.bam OUTPUT={align_dir}/${{nam}}_sorted_rmdup.bam METRICS_FILE={align_dir}/${{nam}}_picardmetrics.txt REMOVE_DUPLICATES=True\n\n'
            'samtools index {align_dir}/${{nam}}_sorted_rmdup.bam\n\n'
            'samtools index {align_dir}/${{nam}}_sorted.bam\n\n'
            '[ -e {align_dir}/${{nam}}_sorted.bam ] && rm {align_dir}/${{nam}}.bam\n\n'
            '[ -e {align_dir}/${{nam}}.sam ] && rm  {align_dir}/${{nam}}.sam\n\n'
            'done\n')

    if sample:
        if os.path.isdir(os.path.join(proj_dir, sample)):
            samples = [sample]
        else:
            raise SystemExit(
                "Given sample {} is not found in project directory {}".format(
                    sample, proj_dir))
    else:
        samples = find_samples(proj_dir)

    for sam in samples:
        sam_dir = os.path.join(proj_dir, sam)
        src_dir = os.path.join(sam_dir, 'scripts')
        if not os.path.exists(src_dir):
            os.mkdir(src_dir)
            align_dir = os.path.join(sam_dir, "alignment_{}".format(aligner),
                                     "bam_files")
            if not os.path.exists(align_dir):
                os.makedirs(align_dir)
                job_file = os.path.join(src_dir,
                                        "{}_{}.sh".format(sam, aligner))
                with open(job_file, 'w') as jb_fl:
                    jb_fl.write(
                        align_template.format(sam=sam,
                                              sam_dir=sam_dir,
                                              align_dir=align_dir,
                                              align_index=align_index))
                if bam_to_bed:
                    run_b2b(project=project,
                            aligner=aligner,
                            slurm=True,
                            sample=sam,
                            job_file=job_file)
                subprocess.check_call(['sbatch', job_file])