def pca_rnaseq(self, counts_table_file): @program def pca(counts_table_file): outprefix = unique_filename_in() args = ['pca.R', counts_table_file, outprefix, "rpkm"] return {"arguments": args, "return_value": outprefix} if not program_exists('pca.R'): self.write_debug("Skipped PCA: pca.R not found.") return try: self.write_log("* PCA") outprefix = pca.nonblocking(self.ex, counts_table_file, via=self.via).wait() except Exception as err: self.write_debug("PCA failed: %s." % str(err)) return if outprefix is None: self.write_debug("PCA failed.") return pca_descr_pdf = set_file_descr('pca.pdf', type='pdf', step='pca', ucsc=0) self.ex.add(outprefix + '.pdf', description=pca_descr_pdf)
def _run_tool(self, tool_name, args): if not program_exists(tool_name): raise OSError("Program not found in $PATH: %s" % tool_name) proc = subprocess.Popen([tool_name] + args, stderr=subprocess.PIPE) stdout, stderr = proc.communicate() if stderr: raise OSError("%s exited with message: %s" % (tool_name, stderr))
def differential_analysis(self, filename): """Launch an analysis of differential expression on the count values, and saves the output in the MiniLIMS.""" @program def run_DE(data_file): """Run limma.R on *data_file*.""" output_file = unique_filename_in() arguments = [ "limma.R", data_file, "-s", "$'\t'", "-o", output_file ] return {'arguments': arguments, 'return_value': output_file} if not program_exists('limma.R'): self.write_debug("Skipped DE analysis: negbin.test.R not found.") return if filename is None: self.write_log( " Skipped differential analysis: empty counts file.") return ncond = len(self.conditions) if ncond < 2: self.write_log( " Skipped differential analysis: less than two groups.") return else: self.write_log("* Differential analysis") try: de_file = run_DE.nonblocking(self.ex, filename, via=self.via).wait() except Exception as err: self.write_debug("DE analysis failed with error: %s." % str(err)) return if de_file is None: self.write_debug( "DE analysis failed (see bein 'program' table).") return output_files = [ f for f in os.listdir(self.ex.working_directory) if de_file in f ] if isinstance(de_file, Exception) or len(output_files) == 0: self.write_debug( "Skipped differential analysis: `de_file` has value %s." % str(de_file)) return self.write_log(" ....done.") return output_files
def pca_rnaseq(self,counts_table_file): @program def pca(counts_table_file): outprefix = unique_filename_in() args = ['pca.R', counts_table_file, outprefix, "rpkm"] return {"arguments": args, "return_value": outprefix} if not program_exists('pca.R'): self.write_debug("Skipped PCA: pca.R not found.") return try: self.write_log("* PCA") outprefix = pca.nonblocking(self.ex, counts_table_file, via=self.via).wait() except Exception as err: self.write_debug("PCA failed: %s." % str(err)) return if outprefix is None: self.write_debug("PCA failed.") return pca_descr_pdf = set_file_descr('pca.pdf', type='pdf', step='pca', ucsc=0) self.ex.add(outprefix+'.pdf', description=pca_descr_pdf)
def differential_analysis(self, filename): """Launch an analysis of differential expression on the count values, and saves the output in the MiniLIMS.""" @program def run_DE(data_file): """Run limma.R on *data_file*.""" output_file = unique_filename_in() arguments = ["limma.R", data_file, "-s","$'\t'", "-o",output_file] return {'arguments': arguments, 'return_value': output_file} if not program_exists('limma.R'): self.write_debug("Skipped DE analysis: negbin.test.R not found.") return if filename is None: self.write_log(" Skipped differential analysis: empty counts file.") return ncond = len(self.conditions) if ncond < 2: self.write_log(" Skipped differential analysis: less than two groups.") return else: self.write_log("* Differential analysis") try: de_file = run_DE.nonblocking(self.ex, filename, via=self.via).wait() except Exception as err: self.write_debug("DE analysis failed with error: %s." % str(err)) return if de_file is None: self.write_debug("DE analysis failed (see bein 'program' table).") return output_files = [f for f in os.listdir(self.ex.working_directory) if de_file in f] if isinstance(de_file,Exception) or len(output_files)==0: self.write_debug("Skipped differential analysis: `de_file` has value %s." % str(de_file)) return self.write_log(" ....done.") return output_files
def find_junctions(self, soapsplice_index=None, path_to_soapsplice=None, soapsplice_options={}): """ Retrieve unmapped reads from a precedent mapping and runs SOAPsplice on them. Return the names of a .bed track indicating the junctions positions, as well as of a bam file of the alignments attesting the junctions. :param soapsplice_index: (str) path to the SOAPsplice index. :param path_to_soapsplice: (str) specify the path to the program if it is not in your $PATH. :param soapsplice_options: (dict) SOAPsplice options, e.g. {'-m':2}. :rtype: str, str """ @program def soapsplice(unmapped_R1, unmapped_R2, index, output=None, path_to_soapsplice=None, options={}): """Bind 'soapsplice'. Return a text file containing the list of junctions. :param unmapped_R1: (str) path to the fastq file containing the 'left' reads. :param unmapped_R2: (str) path to the fastq file containing the 'right' reads. :param index: (str) path to the SOAPsplice index. :param output: (str) output file name. :param path_to_soapsplice: (str) path to the SOAPsplice executable. If not specified, the program must be in your $PATH. :param options: (dict) SOAPsplice options, given as {opt: value}. :rtype: str Main options:: -p: number of threads, <= 20. [1] -S: 1: forward strand, 2: reverse strand, 3: both. [3] -m: maximum mismatch for one-segment alignment, <= 5. [3] -g: maximum indel for one-segment alignment, <= 2. [2] -i: length of tail that can be ignored in one-segment alignment. [7] -t: longest gap between two segments in two-segment alignment. [500000] -a: shortest length of a segment in two-segment alignment. [8] -q: input quality type in FASTQ file (0: old Illumina, 1: Sanger). [0] -L: maximum distance between paired-end reads. [500000] -l: minimum distance between paired-end reads. [50] -I: insert length of paired-end reads. """ if not output: output = unique_filename_in() path_to_soapsplice = path_to_soapsplice or 'soapsplice' args = [ path_to_soapsplice, '-d', index, '-1', unmapped_R1, '-2', unmapped_R2, '-o', output, '-f', '2' ] opts = [] for k, v in options.iteritems(): opts.extend([str(k), str(v)]) return {"arguments": args + opts, "return_value": output} if not program_exists('soapsplice'): self.write_debug("Skipped junctions search: soapsplice not found.") return self.assembly.set_index_path(intype=3) soapsplice_index = soapsplice_index or self.assembly.index_path soapsplice_options.update( self.job.options.get('soapsplice_options', {})) soapsplice_options.setdefault('-p', 16) # number of threads soapsplice_options.setdefault('-q', 1) # Sanger format unmapped_fastq = {} for gid, group in self.job.groups.iteritems(): unmapped_fastq[gid] = [] for rid, run in group['runs'].iteritems(): unmapped = self.job.files[gid][rid].get('unmapped_fastq') if not unmapped: self.write_log( "No unmapped reads found for group %s, run %d. Skip." % (gid, rid)) continue elif not isinstance(unmapped, tuple): self.write_log("Pair-end reads required. Skip.") continue unmapped_fastq[gid].append(unmapped) if len(unmapped_fastq[gid]) == 0: continue R1 = cat(zip(*unmapped_fastq[gid])[0]) R2 = cat(zip(*unmapped_fastq[gid])[1]) future = soapsplice.nonblocking( self.ex, R1, R2, soapsplice_index, path_to_soapsplice=path_to_soapsplice, options=soapsplice_options, via=self.via, memory=8, threads=soapsplice_options['-p']) try: template = future.wait() except Exception as err: self.write_debug("SOAPsplice failed: %s." % str(err)) return if template is None: self.write_debug("SOAPsplice failed.") return junc_file = template + '.junc' bed = self.convert_junc_file(junc_file, self.assembly) bed_descr = set_file_descr('junctions_%s.bed' % group['name'], groupId=gid, type='bed', step='junctions', ucsc=1) bam_descr = set_file_descr('junctions_%s.bam' % group['name'], groupId=gid, type='bam', step='junctions', ucsc=0) sam = template + '.sam' try: bam = sam_to_bam(self.ex, sam, reheader=self.assembly.name) add_and_index_bam(self.ex, bam, description=bam_descr) self.ex.add(bam, description=bam_descr) except Exception as e: self.write_debug( "%s\n(Qualities may be in the wrong format, try with '-q 0'.)" % str(e)) self.ex.add(bed, description=bed_descr) return bed, bam
def find_junctions(self, soapsplice_index=None, path_to_soapsplice=None, soapsplice_options={}): """ Retrieve unmapped reads from a precedent mapping and runs SOAPsplice on them. Return the names of a .bed track indicating the junctions positions, as well as of a bam file of the alignments attesting the junctions. :param soapsplice_index: (str) path to the SOAPsplice index. :param path_to_soapsplice: (str) specify the path to the program if it is not in your $PATH. :param soapsplice_options: (dict) SOAPsplice options, e.g. {'-m':2}. :rtype: str, str """ @program def soapsplice(unmapped_R1, unmapped_R2, index, output=None, path_to_soapsplice=None, options={}): """Bind 'soapsplice'. Return a text file containing the list of junctions. :param unmapped_R1: (str) path to the fastq file containing the 'left' reads. :param unmapped_R2: (str) path to the fastq file containing the 'right' reads. :param index: (str) path to the SOAPsplice index. :param output: (str) output file name. :param path_to_soapsplice: (str) path to the SOAPsplice executable. If not specified, the program must be in your $PATH. :param options: (dict) SOAPsplice options, given as {opt: value}. :rtype: str Main options:: -p: number of threads, <= 20. [1] -S: 1: forward strand, 2: reverse strand, 3: both. [3] -m: maximum mismatch for one-segment alignment, <= 5. [3] -g: maximum indel for one-segment alignment, <= 2. [2] -i: length of tail that can be ignored in one-segment alignment. [7] -t: longest gap between two segments in two-segment alignment. [500000] -a: shortest length of a segment in two-segment alignment. [8] -q: input quality type in FASTQ file (0: old Illumina, 1: Sanger). [0] -L: maximum distance between paired-end reads. [500000] -l: minimum distance between paired-end reads. [50] -I: insert length of paired-end reads. """ if not output: output = unique_filename_in() path_to_soapsplice = path_to_soapsplice or 'soapsplice' args = [path_to_soapsplice,'-d',index,'-1',unmapped_R1,'-2',unmapped_R2,'-o',output,'-f','2'] opts = [] for k,v in options.iteritems(): opts.extend([str(k),str(v)]) return {"arguments": args+opts, "return_value": output} if not program_exists('soapsplice'): self.write_debug("Skipped junctions search: soapsplice not found.") return self.assembly.set_index_path(intype=3) soapsplice_index = soapsplice_index or self.assembly.index_path soapsplice_options.update(self.job.options.get('soapsplice_options',{})) soapsplice_options.setdefault('-p',16) # number of threads soapsplice_options.setdefault('-q',1) # Sanger format unmapped_fastq = {} for gid, group in self.job.groups.iteritems(): unmapped_fastq[gid] = [] for rid, run in group['runs'].iteritems(): unmapped = self.job.files[gid][rid].get('unmapped_fastq') if not unmapped: self.write_log("No unmapped reads found for group %s, run %d. Skip." % (gid,rid)) continue elif not isinstance(unmapped,tuple): self.write_log("Pair-end reads required. Skip.") continue unmapped_fastq[gid].append(unmapped) if len(unmapped_fastq[gid]) == 0: continue R1 = cat(zip(*unmapped_fastq[gid])[0]) R2 = cat(zip(*unmapped_fastq[gid])[1]) future = soapsplice.nonblocking(self.ex,R1,R2,soapsplice_index, path_to_soapsplice=path_to_soapsplice, options=soapsplice_options, via=self.via, memory=8, threads=soapsplice_options['-p']) try: template = future.wait() except Exception as err: self.write_debug("SOAPsplice failed: %s." % str(err)) return if template is None: self.write_debug("SOAPsplice failed.") return junc_file = template+'.junc' bed = self.convert_junc_file(junc_file,self.assembly) bed_descr = set_file_descr('junctions_%s.bed' % group['name'], groupId=gid,type='bed',step='junctions', ucsc=1) bam_descr = set_file_descr('junctions_%s.bam' % group['name'], groupId=gid,type='bam',step='junctions', ucsc=0) sam = template+'.sam' try: bam = sam_to_bam(self.ex,sam,reheader=self.assembly.name) add_and_index_bam(self.ex, bam, description=bam_descr) self.ex.add(bam, description=bam_descr) except Exception as e: self.write_debug("%s\n(Qualities may be in the wrong format, try with '-q 0'.)" %str(e)) self.ex.add(bed, description=bed_descr) return bed, bam
def _run_tool(self, tool_name, args): if not program_exists(tool_name): raise OSError("Program not found in $PATH: %s" % tool_name) proc = subprocess.Popen([tool_name]+args, stderr=subprocess.PIPE) stdout, stderr = proc.communicate() if stderr: raise OSError("%s exited with message: %s" % (tool_name,stderr))