Esempio n. 1
0
 def hisat_align(self, inputs, output, ref_basename, sample):
     '''Align fastq files with HISAT2 and sort'''
     cores = self.get_stage_options("hisat", "cores")
     mem = "{}G".format(self.get_stage_options("hisat", "mem"))
     safe_make_dir(os.path.dirname(output))
     #logging.debug(self.experiment.tr_dict[sample])
     output_log = re.sub(".bam$", ".log", output)
     # If PE fastq inputs, use hisat -1 and -2 arguments, else use -U
     if self.paired_end:
         fastq_input = "-1 {fastq_R1} -2 {fastq_R2}".format(
             fastq_R1=inputs[0], fastq_R2=inputs[1])
     else:
         fastq_input = "-U {fastq}".format(fastq=inputs)
     if self.experiment.stranded in ["FR", "RF", "F", "R"]:
         stranded = "--rna-strandness {}".format(self.experiment.stranded)
     else:
         stranded = ""
     # Get RG information
     info = self.experiment.tr_dict[sample]
     command = "hisat2 -p {n_threads} --dta {stranded} " \
               "--rg-id {sm}_{id}_{ln} --rg SM:{sm} " \
               "--rg LB:{lb} --rg PL:Illumina -x {ref_basename} " \
               "{fastq_input} 2> {output_log} | samtools view -bS - > " \
               "{output_bam} 2>> {output_log}" \
               "".format(n_threads=cores, stranded=stranded, id=info.id,
                       ln=info.lane, sm=info.sample_name, lb=info.library,
                       ref_basename=ref_basename, fastq_input=fastq_input,
                       output_bam=output, output_log=output_log)
     run_stage(self.state, "hisat", command)
Esempio n. 2
0
 def fastqc(self, input, outputs, fastqc_dir):
     '''Run FastQC on fastq files'''
     safe_make_dir(fastqc_dir)
     # If multiple fastq inputs, join into a string
     if isinstance(input, tuple) or isinstance(input, list):
         input = " ".join(input)
     command = "fastqc -o {fastqc_dir} -f fastq {fastq_input}".format(
         fastqc_dir=fastqc_dir, fastq_input=input)
     run_stage(self.state, "fastqc", command)
Esempio n. 3
0
 def create_hisat_index(self, inputs, outputs, hisat_basename):
     '''Generate index for HISAT2'''
     safe_make_dir(os.path.dirname(hisat_basename))
     genome_fa, gene_gtf = inputs
     cores = self.get_stage_options("build_index", "cores")
     command = "hisat2-build -p {n_threads} {genome_fa} {basename}" \
               "".format(n_threads=cores, genome_fa=genome_fa, gene_gtf=gene_gtf,
                   basename=hisat_basename)
     run_stage(self.state, "build_index", command)
Esempio n. 4
0
 def create_star_index(self, inputs, outputs, output_dir):
     '''Generate index for STAR'''
     safe_make_dir(output_dir)
     genome_fa, gene_gtf = inputs
     cores = self.get_stage_options("align", "cores")
     command = "STAR --runThreadN {n_threads} --runMode genomeGenerate " \
               "--genomeDir {output_dir} --genomeFastaFiles {genome_fa} " \
               "--sjdbGTFfile {gene_gtf}".format(n_threads=cores,
                   output_dir=output_dir, genome_fa=genome_fa,
                   gene_gtf=gene_gtf)
     run_stage(self.state, "build_index", command)
Esempio n. 5
0
 def star_align(self, inputs, output, ref_dir, sample):
     '''Align fastq files with STAR'''
     output_dir = os.path.dirname(output)
     safe_make_dir(output_dir)
     #logging.debug(self.experiment.tr_dict[sample])
     cores = self.get_stage_options("align", "cores")
     # If PE fastq inputs, join into a string
     if self.paired_end:
         fastq_input = " ".join(inputs)
     else:
         fastq_input = inputs
     command = "STAR --runThreadN {cores} --genomeDir {ref_dir} " \
               "--readFilesIn {fastq_input} --readFilesCommand zcat " \
               "--outFileNamePrefix {output_dir}/{sample}.star. " \
               "--outSAMtype BAM Unsorted " \
               "--outSAMunmapped Within " \
               "".format(cores=cores, ref_dir=ref_dir, fastq_input=fastq_input,
                       output_dir=output_dir, sample=sample)
     run_stage(self.state, 'star', command)
Esempio n. 6
0
def make_output_dirs(output_dict):
    '''Create directory for each value in the dictionary'''
    for dir in output_dict.values():
        safe_make_dir(dir)