Ejemplo n.º 1
0
 def run(self, threshold=None):
     # Optional threshold #
     if threshold is None: threshold = self.threshold
     identity = (100 - threshold) / 100
     # Dereplicate (uparse version 32bit version runs out of memory) #
     if False: sh.usearch7("--derep_fulllength", self.reads, '-output', self.derep, '-sizeout')
     sh.fasta_make_unique(self.reads, self.derep)
     # Order by size and kill singeltons #
     sh.usearch7("--sortbysize", self.derep, '-output', self.sorted, '-minsize', 2)
     # Compute the centers #
     sh.usearch7("--cluster_otus", self.sorted, '-otus', self.centers, '-otu_radius_pct', threshold)
     # Rename the centers #
     self.centers.rename_with_num('OTU-')
     # Map the reads back to the centers #
     sh.usearch7("-usearch_global", self.reads, '-db', self.centers, '-strand', 'plus', '-id', identity, '-uc', self.readmap)
Ejemplo n.º 2
0
# Internal modules #
import illumitag
from plumbing.common import natural_sort
from plumbing.autopaths import AutoPaths, FilePath
from plumbing.cache import property_cached, LazyString
from fasta import FASTA, SizesFASTA
from illumitag.clustering.otu import OTUs
from illumitag.clustering.taxonomy.crest import CrestTaxonomy
from illumitag.clustering.taxonomy.rdp import RdpTaxonomy
from illumitag.clustering.source.seqenv_wrapper import Seqenv

# Third party modules #
import sh, pandas

# Constants #
uparse_version = LazyString(lambda: sh.usearch7('-version').stdout[8:].strip('\n'))

###############################################################################
class UparseOTUs(OTUs):
    """Will use uparse to create OTU clusters from a given FASTA file
    http://www.nature.com/doifinder/10.1038/nmeth.2604"""

    short_name = 'uparse'
    title = 'UPARSE denovo picking'
    article = "http://www.nature.com/doifinder/10.1038/nmeth.2604"
    version = uparse_version
    threshold = 3.0

    all_paths = """
    /derep.fasta
    /sorted.fasta