Exemple #1
0
 def hisat_align(self, inputs, output, ref_basename, sample):
     '''Align fastq files with HISAT2 and sort'''
     cores = self.get_stage_options("hisat", "cores")
     mem = "{}G".format(self.get_stage_options("hisat", "mem"))
     safe_make_dir(os.path.dirname(output))
     #logging.debug(self.experiment.tr_dict[sample])
     output_log = re.sub(".bam$", ".log", output)
     # If PE fastq inputs, use hisat -1 and -2 arguments, else use -U
     if self.paired_end:
         fastq_input = "-1 {fastq_R1} -2 {fastq_R2}".format(
             fastq_R1=inputs[0], fastq_R2=inputs[1])
     else:
         fastq_input = "-U {fastq}".format(fastq=inputs)
     if self.experiment.stranded in ["FR", "RF", "F", "R"]:
         stranded = "--rna-strandness {}".format(self.experiment.stranded)
     else:
         stranded = ""
     # Get RG information
     info = self.experiment.tr_dict[sample]
     command = "hisat2 -p {n_threads} --dta {stranded} " \
               "--rg-id {sm}_{id}_{ln} --rg SM:{sm} " \
               "--rg LB:{lb} --rg PL:Illumina -x {ref_basename} " \
               "{fastq_input} 2> {output_log} | samtools view -bS - > " \
               "{output_bam} 2>> {output_log}" \
               "".format(n_threads=cores, stranded=stranded, id=info.id,
                       ln=info.lane, sm=info.sample_name, lb=info.library,
                       ref_basename=ref_basename, fastq_input=fastq_input,
                       output_bam=output, output_log=output_log)
     run_stage(self.state, "hisat", command)
Exemple #2
0
 def stringtie_prepDE(self, inputs, outputs):
     '''Create count matrices'''
     input_dir = os.path.dirname(os.path.dirname(inputs[0][0]))
     command = "prepDE.py -i {input_dir} -g {gene_matrix} " \
               "-t {transcript_matrix} -l 75".format(
                   input_dir=input_dir, gene_matrix=outputs[0],
                   transcript_matrix=outputs[1])
     run_stage(self.state, "stringtie", command)
Exemple #3
0
 def sort_bam_by_name(self, input, output):
     '''Sort BAM file by name'''
     # Provide a bit of room between memory requested and samtools max memory
     mem = max(int(self.get_stage_options("samtools", "mem")) - 2, 1)
     input_bam = input[0]
     command = "samtools sort -n -m {mem}G {input} > {output}".format(
         mem=mem, input=input_bam, output=output)
     run_stage(self.state, "samtools", command)
Exemple #4
0
 def fastqc(self, input, outputs, fastqc_dir):
     '''Run FastQC on fastq files'''
     safe_make_dir(fastqc_dir)
     # If multiple fastq inputs, join into a string
     if isinstance(input, tuple) or isinstance(input, list):
         input = " ".join(input)
     command = "fastqc -o {fastqc_dir} -f fastq {fastq_input}".format(
         fastqc_dir=fastqc_dir, fastq_input=input)
     run_stage(self.state, "fastqc", command)
Exemple #5
0
 def stringtie_estimates(self, inputs, outputs):
     '''Get expression estimates with stringtie'''
     input = inputs[0]
     output_gtf = outputs[0]
     cores = self.get_stage_options("stringtie", "cores")
     command = "stringtie -p {cores} -e -B -G {gtf} -o {output} " \
               "{input}".format(cores=cores, gtf=self.gene_ref,
                   output=output_gtf, input=input)
     run_stage(self.state, "stringtie", command)
Exemple #6
0
 def sort_bam_by_coordinate(self, input, outputs):
     '''Sort BAM file by coordinates and then index'''
     output_bam = outputs[0]
     # Provide a bit of room between memory requested and samtools max memory
     mem = max(int(self.get_stage_options("samtools", "mem")) - 2, 1)
     command = "samtools sort -m {mem}G {input} > {output} && " \
               "samtools index {output}".format(mem=mem, output=output_bam,
                   input=input)
     run_stage(self.state, "samtools", command)
Exemple #7
0
 def create_hisat_index(self, inputs, outputs, hisat_basename):
     '''Generate index for HISAT2'''
     safe_make_dir(os.path.dirname(hisat_basename))
     genome_fa, gene_gtf = inputs
     cores = self.get_stage_options("build_index", "cores")
     command = "hisat2-build -p {n_threads} {genome_fa} {basename}" \
               "".format(n_threads=cores, genome_fa=genome_fa, gene_gtf=gene_gtf,
                   basename=hisat_basename)
     run_stage(self.state, "build_index", command)
Exemple #8
0
 def create_star_index(self, inputs, outputs, output_dir):
     '''Generate index for STAR'''
     safe_make_dir(output_dir)
     genome_fa, gene_gtf = inputs
     cores = self.get_stage_options("align", "cores")
     command = "STAR --runThreadN {n_threads} --runMode genomeGenerate " \
               "--genomeDir {output_dir} --genomeFastaFiles {genome_fa} " \
               "--sjdbGTFfile {gene_gtf}".format(n_threads=cores,
                   output_dir=output_dir, genome_fa=genome_fa,
                   gene_gtf=gene_gtf)
     run_stage(self.state, "build_index", command)
Exemple #9
0
 def merge_bams(self, inputs, output):
     '''Merge multiple BAM files into one BAM file. Make a symlink if
     there's only one BAM file.'''
     if len(inputs) == 1:
         re_symlink(inputs[0], output)
     else:
         # Select only bams and not bais, which is the first item in the list
         bam_inputs = " ".join([x[0] for x in inputs])
         bam_output = output[0]
         command = "samtools merge {output} {bam_inputs} && samtools index {output}".format(
             output=bam_output, bam_inputs=bam_inputs)
         run_stage(self.state, "samtools", command)
Exemple #10
0
 def htseq_count(self, input, output):
     '''Count features with HTSeq-count'''
     # YAML converts yes/no to true/false, so we need to convert it back
     if self.experiment.stranded == "FR":
         stranded = "yes"
     elif self.experiment.stranded == "RF":
         stranded = "reverse"
     else:
         stranded = "no"
     # command = "samtools view -h -F 4 {input} | " \
     #           "htseq-count --format=sam --mode=union --order=name " \
     #           "--stranded={stranded} - {gtf_file} > {output}".format(
     #                input=input, stranded=stranded,
     #                gtf_file=self.gene_ref, output=output)
     command = "htseq-count --format=bam --mode=union --order=name " \
               "--stranded={stranded} {input} {gtf_file} > {output}".format(
                    input=input, stranded=stranded,
                    gtf_file=self.gene_ref, output=output)
     run_stage(self.state, "htseq_count", command)
Exemple #11
0
 def star_align(self, inputs, output, ref_dir, sample):
     '''Align fastq files with STAR'''
     output_dir = os.path.dirname(output)
     safe_make_dir(output_dir)
     #logging.debug(self.experiment.tr_dict[sample])
     cores = self.get_stage_options("align", "cores")
     # If PE fastq inputs, join into a string
     if self.paired_end:
         fastq_input = " ".join(inputs)
     else:
         fastq_input = inputs
     command = "STAR --runThreadN {cores} --genomeDir {ref_dir} " \
               "--readFilesIn {fastq_input} --readFilesCommand zcat " \
               "--outFileNamePrefix {output_dir}/{sample}.star. " \
               "--outSAMtype BAM Unsorted " \
               "--outSAMunmapped Within " \
               "".format(cores=cores, ref_dir=ref_dir, fastq_input=fastq_input,
                       output_dir=output_dir, sample=sample)
     run_stage(self.state, 'star', command)
Exemple #12
0
 def run(self, name, command):
     run_stage(self.state, name, command)
Exemple #13
0
 def run_java(self, state, stage, jar_path, mem, args):
     command = self.java_command(jar_path, mem, args)
     run_stage(state, stage, command)