def exportMotifDiscoverySequences(infile, outfile): '''export sequences for motif discovery. This method requires the _interval tables. For motif discovery, only the sequences with the highest S/N ratio are supplied. 1. The top *motifs_proportion* intervals sorted by peakval 2. Only a region +/- *motifs_halfwidth* around the peak 3. At least *motifs_min_sequences*. If there are not enough sequences to start with, all will be used. 4. At most *motifs_max_size* sequences will be output. ''' track = P.snip(infile, "_intervals.load") dbhandle = connect() p = P.substituteParameters(**locals()) nseq = PipelineMotifs.writeSequencesForIntervals( track, outfile, dbhandle, full=False, masker=P.asList(p['motifs_masker']), halfwidth=int(p["motifs_halfwidth"]), maxsize=int(p["motifs_max_size"]), proportion=p["motifs_proportion"], min_sequences=p["motifs_min_sequences"], num_sequences=p["motifs_num_sequences"], order=p['motifs_score']) if nseq == 0: E.warn("%s: no sequences - meme skipped" % outfile) P.touch(outfile)
def run_mapping(infile, outfile): ''' Map reads with the specified read mapper ''' if PARAMS["mapper"] == "star": job_threads = PARAMS["star_threads"] job_memory = PARAMS["star_memory"] star_mapping_genome = PARAMS["star_genome"] or PARAMS["genome"] m = PipelineMapping.STAR( executable=P.substituteParameters(**locals())["star_executable"], strip_sequence=0) elif PARAMS["mapper"] == "bowtie": job_threads = PARAMS["bowtie_threads"] job_memory = PARAMS["bowtie_memory"] m = PipelineMapping.Bowtie(executable="bowtie", tool_options=PARAMS["bowtie_options"], strip_sequence=0) genome = PARAMS["bowtie_genome"] reffile = os.path.join(PARAMS["bowtie_index_dir"], PARAMS["bowtie_genome"] + ".fa") statement = m.build((infile, ), outfile) P.run()
def exportMotifDiscoverySequences(infile, outfile): '''export sequences for motif discovery. This method requires the _interval tables. For motif discovery, only the sequences with the highest S/N ratio are supplied. 1. The top *motifs_proportion* intervals sorted by peakval 2. Only a region +/- *motifs_halfwidth* around the peak 3. At least *motifs_min_sequences*. If there are not enough sequences to start with, all will be used. 4. At most *motifs_max_size* sequences will be output. ''' track = P.snip(infile, "_intervals.load") dbhandle = connect() p = P.substituteParameters(**locals()) nseq = PipelineMotifs.writeSequencesForIntervals( track, outfile, dbhandle, full=False, masker=P.asList( p['motifs_masker']), halfwidth=int( p["motifs_halfwidth"]), maxsize=int( p["motifs_max_size"]), proportion=p[ "motifs_proportion"], min_sequences=p[ "motifs_min_sequences"], num_sequences=p[ "motifs_num_sequences"], order=p['motifs_score']) if nseq == 0: E.warn("%s: no sequences - meme skipped" % outfile) P.touch(outfile)
def mapReadsWithHisat(infiles, outfile): ''' Map reads using Hisat (spliced reads). Parameters ---------- infiles: list contains two filenames - infiles[0]: str filename of reads file can be :term:`fastq`, :term:`sra`, csfasta infiles[1]: str filename with suffix .junctions containing a list of known splice junctions. hisat_threads: int :term:`PARAMS` number of threads with which to run hisat hisat_memory: str :term:`PARAMS` memory required for hisat job hisat_executable: str :term:`PARAMS` path to hisat executable hisat_library_type: str :term:`PARAMS` hisat rna-strandess parameter, see https://ccb.jhu.edu/software/hisat/manual.shtml#command-line hisat_options: str options string for hisat, see https://ccb.jhu.edu/software/hisat/manual.shtml#command-line hisat_index_dir: str path to directory containing hisat indices strip_sequence: bool :term:`PARAMS` if set, strip read sequence and quality information outfile: str :term:`bam` filename to write the mapped reads in bam format. .. note:: If hisat fails with an error such as:: Error: segment-based junction search failed with err =-6 what(): std::bad_alloc it means that it ran out of memory. ''' job_threads = PARAMS["hisat_threads"] job_memory = PARAMS["hisat_memory"] m = PipelineMapping.Hisat( executable=P.substituteParameters( **locals())["hisat_executable"], strip_sequence=PARAMS["strip_sequence"]) infile, junctions = infiles infile = P.snip(infile, ".subset") + ".fastq.gz" if not os.path.exists(infile): infile = P.snip(infile, ".fastq.gz") + ".fastq.1.gz" statement = m.build((infile,), outfile) P.run()