Exemple #1
0
 def bwa_align(self, inputs, output, ref_fasta, input_path, output_path, sm,
               extra_options):
     '''Align fastq files with BWA mem'''
     safe_make_dir(os.path.dirname(output))
     # cores = min(self.get_stage_options("alignment", "cores") - 1, 1)
     cores = self.get_stage_options("alignment", "cores")
     # If PE fastq inputs, join into a string
     if isinstance(inputs, list):
         fastq_input = " ".join(inputs)
     else:
         fastq_input = inputs
     lb = os.path.basename(input_path)
     log_file = os.path.join(output_path, "{sm}.bwa.log".format(sm=sm))
     read_group_string = "@RG\tID:{lb}\tLB:{lb}\tPU:{lb}\tPL:ILLUMINA\tSM:{sm}" \
                         "".format(lb=lb, sm=sm)
     command = 'set -o pipefail; bwa mem -t {cores} {extra_options} ' \
               '-R "{rg}" {ref_fasta} {fastq_input} 2> {log_file} | ' \
               'samtools view -b - > {output}' .format(
                       cores=cores, extra_options=extra_options,
                       rg=read_group_string, ref_fasta=ref_fasta,
                       fastq_input=fastq_input, output=output,
                       log_file=log_file)
     # Use bash instead of sh
     command = "bash -c '{}'".format(command)
     run_stage(self.state, "alignment", command)
Exemple #2
0
 def bwa_index(self, input, outputs, reference_dir):
     '''Index reference genome for bwa alignment'''
     safe_make_dir(reference_dir)
     command = "ln -sf {ref_fasta} {ref_symlink} && bwa index {ref_symlink}".format(
         ref_fasta=os.path.abspath(input),
         ref_symlink=os.path.join(reference_dir, "reference.fa"))
     run_stage(self.state, "build_index", command)
Exemple #3
0
 def gstacks(self, inputs, output, input_dir, output_dir, aligner_name,
             final_bam_name, sample_list, gstacks_options):
     '''Run gstacks'''
     safe_make_dir(output_dir)
     # Create popmap file using sample_list
     popmap_filename = os.path.join(output_dir, "popmap.txt")
     gstacks_popmap = ["{}\t1".format(x) for x in sample_list]
     with open(popmap_filename, "w") as f:
         f.write("\n".join(gstacks_popmap))
     cores = self.get_stage_options("gstacks", "cores")
     if aligner_name == "bwa_mem":
         suffix = ".bwa.sorted"
     else:
         suffix = ".{aligner}.sorted".format(aligner=aligner_name)
     if final_bam_name == "filter_bam":
         suffix = suffix + ".filtered.bam"
     else:
         suffix = suffix + ".bam"
     command = "gstacks -t {cores} -M {popmap} -I {input_dir} -S {suffix} " \
               "{gstacks_options} -O {output_dir}".format(
                       cores=cores, popmap=popmap_filename,
                       input_dir=input_dir, suffix=suffix,
                       gstacks_options=gstacks_options,
                       output_dir=output_dir)
     run_stage(self.state, "gstacks", command)
Exemple #4
0
 def bowtie_align(self, inputs, output, index_base, input_path, output_path,
                  sm, extra_options):
     '''Align fastq files with Bowtie'''
     safe_make_dir(os.path.dirname(output))
     cores = self.get_stage_options("alignment", "cores")
     # If PE fastq inputs, join into a string
     # Bowtie doesn't accept gzipped files
     if isinstance(inputs, list):
         assert (len(inputs) == 2)
         fastq_input = "-1 <(zcat {r1}) -2 <(zcat {r2})" \
                       .format(r1=inputs[0], r2=inputs[1])
     else:
         fastq_input = "<(zcat {})".format(inputs)
     lb = os.path.basename(input_path)
     log_file = os.path.join(output_path, "{sm}.bowtie.log".format(sm=sm))
     command = "set -o pipefail; bowtie --threads {cores} --sam {extra_options} " \
               "--sam-RG ID:{lb} --sam-RG LB:{lb} --sam-RG PU:{lb} " \
               "--sam-RG PL:ILLUMINA --sam-RG SM:{sm} " \
               "{index_base} {fastq} 2> {log_file} | samtools view -b - " \
               "> {output}".format(
                       cores=cores, extra_options=extra_options, lb=lb,
                       sm=sm, index_base=index_base, fastq=fastq_input,
                       log_file=log_file, output=output)
     # Use bash instead of sh
     command = 'bash -c "{}"'.format(command)
     run_stage(self.state, "alignment", command)
Exemple #5
0
 def sort_bam(self, input, output):
     '''Sort BAM file by coordinates'''
     cores = self.get_stage_options("sort_bam", "cores")
     mem = max(
         floor(self.get_stage_options("sort_bam", "mem") / cores) - 1, 1)
     command = "samtools sort -@ {cores} -m {mem}G -o {output} {input} " \
               "&& samtools index {output}".format(cores=cores, mem=mem,
                       output=output, input=input)
     run_stage(self.state, "sort_bam", command)
Exemple #6
0
 def process_radtags(self, inputs, output, output_dir, lib, re_1, re_2,
                     extra_options):
     '''Process radtags to separate into separate fastq files'''
     lib_output_dir = os.path.join(output_dir, lib)
     safe_make_dir(lib_output_dir)
     command = "process_radtags -1 {r1} -2 {r2} -b {barcodes_file} " \
               "-i gzfastq -o {lib_output_dir} --inline_inline --renz_1 {re_1} " \
               "--renz_2 {re_2} {extra_options} && touch {success_file}".format(
                   r1=inputs[0], r2=inputs[1], barcodes_file=inputs[2],
                   lib_output_dir=lib_output_dir, re_1=re_1, re_2=re_2,
                   extra_options=extra_options, success_file=output)
     run_stage(self.state, "process_radtags", command)
Exemple #7
0
 def fastqc(self, input, outputs, fastqc_dir, lib):
     '''Run FastQC on fastq files'''
     safe_make_dir(fastqc_dir)
     fastqc_output_dir = os.path.join(fastqc_dir, lib)
     safe_make_dir(fastqc_output_dir)
     assert (isinstance(input, list))
     # Remove barcode file
     fastq_input = input[:-1]
     fastq_input = " ".join(fastq_input)
     command = "fastqc -o {fastqc_output_dir} -f fastq {fastq_input}".format(
         fastqc_output_dir=fastqc_output_dir, fastq_input=fastq_input)
     run_stage(self.state, "fastqc", command)
Exemple #8
0
 def populations(self, output, gstacks_dir, populations_dir, popmap_file,
                 populations_options):
     '''Run Stacks populations'''
     output_dir = os.path.dirname(output)
     safe_make_dir(populations_dir)
     safe_make_dir(output_dir)
     cores = self.get_stage_options("populations", "cores")
     # Copy popmap file to directory
     popmap_copy = os.path.join(output_dir, "popmap.txt")
     shutil.copyfile(popmap_file, popmap_copy)
     # Get r value
     r = output_dir.split("_")[-1][1:]
     command = "populations -P {gstacks_dir} -O {output_dir} -t {cores} " \
               "-M {popmap} -r {r} --vcf {populations_options}".format(
                       gstacks_dir=gstacks_dir,
                       output_dir=output_dir, cores=cores,
                       popmap=popmap_file, r=r,
                       populations_options=populations_options)
     run_stage(self.state, "populations", command)
Exemple #9
0
def run_java(state, stage, jar_path, mem, args):
    command = java_command(jar_path, mem, args)
    run_stage(state, stage, command)
Exemple #10
0
 def run(self, name, command):
     run_stage(self.state, name, command)
Exemple #11
0
 def multiqc_fastqc(self, input, output, qc_dir, fastqc_dir):
     '''Run MultiQC on the FastQC directory'''
     command = "multiqc --module fastqc --outdir {qc_dir} " \
               "--filename multiqc_fastqc {fastqc_dir}".format(
                   qc_dir=qc_dir, fastqc_dir=fastqc_dir)
     run_stage(self.state, "multiqc", command)
Exemple #12
0
 def multiqc_flagstat(self, input, output, qc_dir, flagstat_dir):
     '''Run MultiQC on the flagstat directory'''
     command = "multiqc --module samtools --outdir {qc_dir} " \
               "--filename multiqc_flagstat {flagstat_dir}".format(
                   qc_dir=qc_dir, flagstat_dir=flagstat_dir)
     run_stage(self.state, "multiqc", command)
Exemple #13
0
 def flagstat(self, input, output):
     '''Run Samtools flagstat on final BAM files'''
     safe_make_dir(os.path.dirname(output))
     command = "samtools flagstat {input} > {output}".format(input=input,
                                                             output=output)
     run_stage(self.state, "flagstat", command)
Exemple #14
0
 def filter_bam(self, input, output, extra_options):
     '''Filter BAM file with Samtools view'''
     command = "samtools view -b {extra_options} {input} > {output} && " \
               "samtools index {output}".format(extra_options=extra_options,
                       input=input, output=output)
     run_stage(self.state, "filter_bam", command)