Example #1
0
    def __init__(self, fasta_path, parent, database='silvamod', base_dir=None):
        # Parent #
        self.otu, self.parent = parent, parent
        # Inherited #
        self.samples = self.parent.samples
        # FASTA #
        self.fasta = FASTA(fasta_path)
        # The database to use #
        self.database = database
        self.database_path = databases[database]
        # Dir #
        if base_dir is None: self.base_dir = self.parent.p.crest_dir
        else: self.base_dir = base_dir
        self.p = AutoPaths(self.base_dir, self.all_paths)
        # Graphs #
        self.graphs = [
            getattr(plots, cls_name)(self) for cls_name in plots.__all__
        ]
        # OTU table #
        self.otu_csv = CSVTable(self.p.otu_csv, d='\t')
        self.otu_csv_norm = CSVTable(self.p.otu_csv_norm, d='\t')
        # Filtered centers file #
        self.centers = FASTA(self.p.centers)
        # Composition tables #
        self.comp_phyla = CompositionPhyla(self, self.p.comp_phyla)
        self.comp_tips = CompositionTips(self, self.p.comp_tips)
        self.comp_order = CompositionOrder(self, self.p.comp_order)
        self.comp_class = CompositionClass(self, self.p.comp_class)

        # Stats #
        self.stats = StatsOnTaxonomy(self)
Example #2
0
 def __init__(self, samples, name, base_dir=None):
     # Save samples #
     self.name = name
     self.samples, self.children = samples, samples
     # Check names are unique #
     names = [s.short_name for s in samples if s.used]
     assert len(names) == len(set(names))
     # Figure out pools #
     self.pools = list(set([s.pool for s in self.samples]))
     self.pools.sort(key=lambda x: x.id_name)
     # Load them #
     for p in self.pools:
         p.load()
     # Dir #
     if base_dir: self.base_dir = base_dir
     else:
         self.base_dir = illumitag.view_dir + "clusters/" + self.name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Runner #
     self.runner = ClusterRunner(self)
     # FASTA #
     self.reads = FASTA(self.p.all_reads_fasta)
     # OTU picking #
     self.otu_uparse = UparseOTUs(self)
     self.otu_uclust = UclustOTUs(self)
     self.otu_cdhit = CdhitOTUs(self)
     # Reporting #
     self.reporter = ClusterReporter(self)
Example #3
0
 def __init__(self, parent):
     # Save parent #
     self.composition, self.parent = parent, parent
     # Paths #
     self.p = AutoPaths(self.parent.p.stats_dir, self.all_paths)
     # Children #
     self.nmds = NMDS(self, self.parent.taxa_csv)
Example #4
0
 def __init__(self, fasta, base_dir):
     # Base params #
     self.fasta = fasta if isinstance(fasta, FASTA) else FASTA(fasta)
     self.base_dir = base_dir
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Extra simple composition #
     from illumitag.clustering.composition import SimpleComposition
     self.composition = SimpleComposition(self, self.base_dir + 'comp_' + self.short_name + '/')
Example #5
0
 def __init__(self, name, pools, out_dir):
     # Attributes #
     self.name = name
     self.pools = pools
     self.loaded = False
     # Dir #
     self.base_dir = out_dir + self.name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
Example #6
0
 def __init__(self, parent, base_dir):
     self.parent, self.taxonomy = parent, parent
     self.base_dir = base_dir
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Simple graph #
     self.graph = illumitag.clustering.composition.plots.TaxaBarstack(self)
     self.graph.bottom = 0.40
     self.graph.legend_anchor = -0.3
     self.formats = ('pdf',)
Example #7
0
 def __init__(self, name, pools, projs_dir):
     # Attributes #
     self.name = name
     self.pools, self.children = pools, pools
     self.loaded = False
     # Dir #
     self.base_dir = projs_dir + self.name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Extra #
     self.meta_data_path = illumitag.repos_dir + 'projects/' + self.name + '.csv'
Example #8
0
 def __init__(self, parent, csv, calc_distance=True):
     # Save parent #
     self.stat, self.parent = parent, parent
     self.csv = csv
     # Options #
     self.calc_distance = calc_distance
     # Paths #
     self.base_dir = self.parent.p.nmds_dir
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Graph #
     self.graph = GraphNMDS(self, base_dir=self.base_dir)
Example #9
0
 def __init__(self, num, pools, out_dir):
     # Attributes #
     self.num = num
     self.name = "run%i" % num
     self.pools, self.children = pools, pools
     self.loaded = False
     # Dir #
     self.base_dir = out_dir + self.name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Illumina report #
     self.xml_report_path = self.directory + "report.xml"
Example #10
0
 def __init__(self, parent):
     # Save parent #
     self.parent, self.pool = parent, parent
     # Auto paths #
     self.base_dir = self.parent.p.fractions_dir
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Size fractions #
     self.low = Fraction(self, self.p.low_dir, 430, 446)
     self.med = Fraction(self, self.p.med_dir, 447, 464)
     self.big = Fraction(self, self.p.big_dir, 465, 488)
     self.children = [self.low, self.med, self.big]
Example #11
0
 def __init__(self, parent):
     # Save parent #
     self.tax, self.parent = parent, parent
     # Paths #
     self.p = AutoPaths(self.parent.p.stats_dir, self.all_paths)
     # R stuff #
     self.nmds = NMDS(self, self.parent.otu_csv)
     self.permanova = PERMANOVA(self)
     self.betadis = BetaDispersion(self)
     # Other #
     self.unifrac = Unifrac(self)
Example #12
0
 def __init__(self, parent, base_dir=None):
     # Parent #
     self.otu, self.parent = parent, parent
     self.taxonomy = self.parent.taxonomy
     # Inherited #
     self.samples = self.parent.samples
     # Dir #
     if base_dir is None: self.base_dir = self.parent.p.seqenv
     else: self.base_dir = base_dir
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Files #
     self.abundances = CSVTable(self.p.abundances)
Example #13
0
 def load(self):
     # Special case for dummy samples #
     if self.info.get('dummy'): return
     # Paths #
     self.base_dir = self.pool.p.samples_dir + self.bar_name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     self.path = str(self.p.orig_fastq)
     # Distances #
     self.trim_fwd = self.pool.samples.trim_fwd
     self.trim_rev = self.pool.samples.trim_rev
     # Files #
     self.trimmed = FASTQ(self.p.trimmed)
     self.renamed = FASTQ(self.p.renamed)
     self.fasta = FASTA(self.p.reads_fasta)
Example #14
0
 def __init__(self, fasta_path, base_dir, parent, verbose=False):
     # Base #
     self.fasta = FASTA(fasta_path)
     self.parent = parent
     self.verbose = verbose
     # Auto paths #
     self.base_dir = base_dir
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Files #
     self.derep_cluster = SizesFASTA(self.p.derep_cluster)
     self.cluster_99 = SizesFASTA(self.p.cluster_99)
     self.positive = SizesFASTA(self.p.positive)
     self.negative = SizesFASTA(self.p.negative)
     self.subsampled = FASTA(self.p.subsampled)
Example #15
0
 def __init__(self, json_path, out_dir):
     # Attributes #
     self.out_dir = out_dir
     self.json_path = FilePath(json_path)
     # Parse #
     with open(json_path) as handle:
         self.info = json.load(handle)
     # Basic #
     self.run_num = self.info['run_num']
     self.project_short_name = self.info['project']
     self.project_long_name = self.info['project_name']
     # Own attributes #
     self.num = self.info['sample_num']
     self.short_name = self.info['sample']
     self.long_name = self.info['sample_name']
     self.name = 'run%i_sample%i' % (self.run_num, self.num)
     self.group = self.info['group']
     self.id_name = "run%03d-sample%02d" % (self.run_num, self.num)
     # SFF files #
     self.sff_files_info = self.info['files']
     for f in self.sff_files_info:
         if not os.path.exists(f['path']):
             raise Exception("No file at %s" % f['path'])
     # Automatic paths #
     self.base_dir = self.out_dir + self.id_name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Make an alias to the json #
     self.p.info_json.link_from(self.json_path, safe=True)
     # Pool dummy #
     self.pool, self.parent = self, self
     # Other dummy variables #
     self.bar_len = 0
     self.gziped = False
     self.used = True
     # Primer #
     self.primer_regex = re.compile(self.info['primer'])
     # Raw files #
     self.raw_fasta = FASTA(self.p.raw_fasta)
     self.raw_fastq = FASTQ(self.p.raw_fastq)
     # Standard FASTA #
     self.reads = FASTA(self.p.reads_fasta)
     self.fasta = FASTA(self.p.renamed)
     # Special FASTQ #
     self.fastq = FASTQ(self.p.reads_fastq)
     # A shameless hack for cdhit to work #
     self.renamed = self.fastq
     # Pre-denoised special case #
     if self.info['predenoised'] and False:
         self.sff_files_info = []
         self.reads.link_from(self.info['predenoised'], safe=True)
Example #16
0
 def __init__(self, parent, base_dir=None):
     # Parent #
     self.taxonomy, self.parent = parent, parent
     # Inherited #
     self.samples = self.parent.samples
     # Dir #
     if base_dir is None: self.base_dir = self.parent.p.composition_dir
     else: self.base_dir = base_dir
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Graphs #
     self.graphs = [getattr(plots, cls_name)(self) for cls_name in plots.__all__]
     # Taxa table #
     self.taxa_csv = CSVTable(self.p.taxa_csv)
     # Stats #
     self.stats = StatsOnComposition(self)
Example #17
0
 def __init__(self, parent):
     # Save parent #
     self.stat, self.parent = parent, parent
     self.tax = parent.tax
     # Paths #
     self.p = AutoPaths(self.parent.p.unifrac_dir, self.all_paths)
     # Files #
     self.clustalo_aligned = FASTA(self.p.clustalo_align)
     self.pynast_aligned = FASTA(self.p.pynast_align)
     self.mothur_aligned = FASTA(self.p.mothur_align)
     self.raxml_tree = FilePath(self.p.raxml_tree)
     self.fasttree_tree = FilePath(self.p.fasttree_tree)
     self.distances_csv = CSVTable(self.p.distances_csv)
     # Graphs #
     self.nmds = NMDS(self, self.distances_csv, calc_distance=False)
Example #18
0
 def __init__(self, parent, base_dir, lower_bound, upper_bound):
     # Save parent #
     self.parent, self.fractions = parent, parent
     # Auto paths #
     self.base_dir = base_dir
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Bounds #
     self.lower_bound = lower_bound
     self.upper_bound = upper_bound
     # Size fractions #
     self.reads = FASTA(self.p.reads_fasta)
     self.refere = UchimeRef(self.p.reads, self.p.refere_dir, self)
     self.denovo = UchimeDenovo(self.p.reads, self.p.denovo_dir, self)
     # Classification #
     self.rdp = SimpleRdpTaxonomy(self.reads, self.p.rdp_dir)
     self.crest = SimpleCrestTaxonomy(self.reads, self.p.crest_dir)
Example #19
0
 def __init__(self, parent):
     # Save parent #
     self.parent, self.assemble_group = parent, parent
     self.samples = parent.samples
     self.pool = self.parent.pool
     # Auto paths #
     self.base_dir = parent.p.groups_dir + self.short_name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # More #
     self.orig_reads = self.parent.cls(self.p.orig_fastq, samples=self.samples)
     self.n_filtered = self.parent.cls(self.p.n_filtered, samples=self.samples)
     # Quality filtered #
     if self.parent == 'assembled':
         self.qual_filtered = FASTQ(self.p.qual_filtered, samples=self.samples)
         self.len_filtered = FASTQ(self.p.len_filtered_fastq, samples=self.samples)
         self.trimmed_barcodes = FASTA(self.p.trimmed_barcodes)
     # Further #
     self.load()
Example #20
0
 def __init__(self, cluster):
     # Save parent #
     self.cluster, self.parent = cluster, cluster
     # Inherited #
     self.samples = self.parent.samples
     # Paths #
     self.base_dir = self.parent.p.otus_dir + self.short_name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Main FASTA file #
     self.reads = self.parent.reads
     # Files #
     self.all_otus = FilePath(self.p.all_otus)
     self.all_centers = FASTA(self.p.all_centers)
     self.otus = FilePath(self.base_dir + "otus.txt")
     self.centers = FASTA(self.base_dir + "centers.fasta")
     # Taxonomy #
     self.taxonomy_silva = CrestTaxonomy(self.centers, self, 'silvamod', self.p.silva)
     self.taxonomy_fw = CrestTaxonomy(self.centers, self, 'freshwater', self.p.fw_dir)
     # Preferred one #
     self.taxonomy = self.taxonomy_silva
Example #21
0
 def __init__(self, path, parent):
     # Save parent #
     self.parent, self.pool = parent, parent
     self.samples = parent.samples
     # Auto paths #
     self.base_dir = parent.p.quality_dir + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Files #
     self.untrimmed = FASTQ(path, samples=self.samples)
     self.only_used = FASTA(self.p.only_used, samples=self.samples)
     self.trimmed = FASTA(self.p.trimmed)
     # Qiime output #
     self.qiime_fasta = FASTA(self.p.qiime_fasta)
     # Mothur #
     self.mothur_fasta = FASTA(self.p.mothur_fasta)
     self.mothur_qual = QualFile(self.p.mothur_qual)
     self.mothur_groups = GroupFile(self.p.mothur_groups)
     # Primer size #
     self.trim_fwd = self.pool.samples.trim_fwd
     self.trim_rev = self.pool.samples.trim_rev
Example #22
0
 def __init__(self, parent):
     # Save parent #
     self.parent, self.pool = parent, parent
     self.samples = parent.samples
     # Paths #
     self.base_dir = self.pool.p.groups_dir + self.short_name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Super #
     self.fwd_path = self.p.fwd_fastq
     self.rev_path = self.p.rev_fastq
     self.gziped = True if self.fwd_path.endswith('gz') else False
     # Add assembly files #
     self.assembled = Assembled(self)
     self.unassembled = Unassembled(self)
     self.children = (self.assembled, self.unassembled)
     self.first = self.assembled
     # Graphs #
     self.graphs = [
         getattr(outcome_plots, cls_name)(self)
         for cls_name in outcome_plots.__all__
     ]
Example #23
0
 def __init__(self, cluster):
     # Save parent #
     self.cluster, self.parent = cluster, cluster
     # Inherited #
     self.samples = self.parent.samples
     # Paths #
     self.base_dir = self.parent.p.otus_dir + self.short_name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Main reads file here FASTQ #
     self.reads = FASTQ(self.p.all_reads)
     # Files #
     self.cdhit_clusters = FilePath(self.p.clstr)
     self.cdhit_centers = FASTA(self.p.clusters_dir + "OTU")
     self.centers = FASTA(self.p.centers)
     # Taxonomy #
     self.taxonomy_silva = CrestTaxonomy(self.centers, self, 'silvamod',
                                         self.p.silva)
     self.taxonomy_fw = CrestTaxonomy(self.centers, self, 'freshwater',
                                      self.p.fw_dir)
     # Preferred one #
     self.taxonomy = self.taxonomy_silva
Example #24
0
 def __init__(self, parent):
     # Save parent #
     self.parent, self.outcome = parent, parent
     self.samples = parent.samples
     # Extra #
     self.pool = self.outcome.parent
     self.samples = self.pool.samples
     self.primers = self.pool.primers
     # Load #
     self.load()
     # Auto paths #
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # All primer outcomes #
     self.good_primers     = GoodPrimers(self)
     self.wrong_primers    = WrongPrimers(self)
     self.only_fwd_primers = OnlyFwdPrimers(self)
     self.only_rev_primers = OnlyRevPrimers(self)
     self.no_primers       = NoPrimers(self)
     # Group them #
     self.children = (self.good_primers, self.wrong_primers, self.only_fwd_primers, self.only_rev_primers, self.no_primers)
     self.first = self.good_primers
Example #25
0
 def __init__(self, fasta_path, parent, base_dir=None):
     # Parent #
     self.otu, self.parent = parent, parent
     # Inherited #
     self.samples = self.parent.samples
     # FASTA #
     self.fasta = FASTA(fasta_path)
     # Dir #
     if base_dir is None: self.base_dir = self.parent.p.rdp_dir
     else: self.base_dir = base_dir
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Graphs #
     self.graphs = [
         getattr(plots, cls_name)(self) for cls_name in plots.__all__
     ]
     # Tables #
     self.otu_csv = CSVTable(self.p.otu_csv)
     # Composition tables #
     #self.comp_phyla = CompositionPhyla(self, self.p.comp_phyla)
     #self.comp_tips = CompositionTips(self, self.p.comp_tips)
     # Stats #
     self.stats = StatsOnTaxonomy(self)
Example #26
0
 def __init__(self, cluster):
     # Save parent #
     self.cluster, self.parent = cluster, cluster
     # Inherited #
     self.samples = self.parent.samples
     # Paths #
     self.base_dir = self.parent.p.otus_dir + self.short_name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Main FASTA file #
     self.reads = self.parent.reads
     # Files #
     self.derep = SizesFASTA(self.p.derep)
     self.sorted = SizesFASTA(self.p.sorted)
     self.centers = FASTA(self.p.centers)
     self.readmap = UClusterFile(self.p.readmap)
     # Taxonomy #
     self.taxonomy_silva = CrestTaxonomy(self.centers, self, 'silvamod', self.p.silva_dir)
     self.taxonomy_fw = CrestTaxonomy(self.centers, self, 'freshwater', self.p.fw_dir)
     self.taxonomy_rpd = RdpTaxonomy(self.centers, self)
     # Preferred one #
     self.taxonomy = self.taxonomy_silva
     # Source tracking #
     self.seqenv = Seqenv(self)
Example #27
0
 def load(self):
     """A second __init__ that is delayed, solves some circular references"""
     # Automatic paths #
     self.base_dir = self.out_dir + self.id_name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Make an alias to the json #
     self.p.info_json.link_from(self.json_path, safe=True)
     # Children #
     self.samples.load()
     # Raw file pairs #
     self.fwd_path = home + "ILLUMITAG/INBOX/%s/%s/%s" % (self.run.label, self.label, self.info['forward_reads'])
     self.rev_path = home + "ILLUMITAG/INBOX/%s/%s/%s" % (self.run.label, self.label, self.info['reverse_reads'])
     self.fwd = FASTQ(self.fwd_path)
     self.rev = FASTQ(self.rev_path)
     self.fastq = PairedFASTQ(self.fwd.path, self.rev.path, self)
     # Barcode length #
     self.bar_len = self.samples.bar_len
     # Make Outcomes #
     self.no_barcodes   = NoBarcode(self)
     self.one_barcodes  = OneBarcode(self)
     self.same_barcodes = SameBarcode(self)
     self.bad_barcodes  = BadBarcode(self)
     self.good_barcodes = GoodBarcode(self)
     self.outcomes = (self.good_barcodes, self.no_barcodes, self.one_barcodes, self.same_barcodes, self.bad_barcodes)
     self.children = self.outcomes
     # The good reads #
     self.quality_reads = QualityReads(self.good_barcodes.assembled.good_primers.len_filtered, self)
     self.fractions = Fractions(self)
     # Runner #
     self.runner = PoolRunner(self)
     # Graphs #
     self.graphs = [getattr(pool_plots, cls_name)(self) for cls_name in pool_plots.__all__]
     # Loaded #
     self.loaded = True
     # Return self for convenience #
     return self
Example #28
0
 def __init__(self, json_path, out_dir):
     # Attributes #
     self.out_dir = out_dir
     self.json_path = FilePath(json_path)
     # Parse #
     with open(json_path) as handle:
         self.info = json.load(handle)
     # Basic #
     self.account = self.info['uppmax_id']
     self.run_num = self.info['run_num']
     self.run_label = self.info['run_id']
     self.project_short_name = self.info['project']
     self.project_long_name = self.info['project_name']
     self.fwd_name = self.info['forward_reads']
     self.rev_name = self.info['reverse_reads']
     # Own attributes #
     self.num = self.info['sample_num']
     self.label = self.info['sample_id']
     self.short_name = self.info['sample']
     self.long_name = self.info['sample_name']
     self.name = 'run%i_sample%i' % (self.run_num, self.num)
     self.group = self.info['group']
     self.id_name = "run%03d-sample%02d" % (self.run_num, self.num)
     self.fwd_mid = self.info['forward_mid']
     self.rev_mid = self.info['forward_mid']
     # Automatic paths #
     self.base_dir = self.out_dir + self.id_name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Special #
     self.primers = TwoPrimers(self)
     # Samples dummy #
     self.info['samples'] = [{
         "name": self.short_name,
         "used": 1,
         "group": self.group,
         "dummy": 1,
         "num": self.num,
         "fwd": "",
         "rev": ""
     }]
     self.samples = Samples(self)
     self.samples.load()
     # Pool dummy #
     self.pool, self.parent = self, self
     # Files #
     self.fwd_path = home + "ILLUMITAG/INBOX/%s/%s/%s" % (
         self.run_label, self.label, self.fwd_name)
     self.rev_path = home + "ILLUMITAG/INBOX/%s/%s/%s" % (
         self.run_label, self.label, self.rev_name)
     self.gziped = True if self.fwd_path.endswith('gz') else False
     self.fwd = FASTQ(self.p.fwd)
     self.rev = FASTQ(self.p.rev)
     self.fastq = PairedFASTQ(self.fwd.path, self.rev.path, self)
     # Barcode length #
     self.bar_len = 0
     # Make an alias to the json #
     self.p.info_json.link_from(self.json_path, safe=True)
     # Assembly files as children #
     self.assembled = Assembled(self)
     self.unassembled = Unassembled(self)
     self.children = (self.assembled, self.unassembled)
     self.first = self.assembled
     # Graphs #
     self.graphs = [
         getattr(outcome_plots, cls_name)(self)
         for cls_name in outcome_plots.__all__
     ]
     # Runner #
     self.runner = PresampleRunner(self)
     # Final #
     self.trimmed = FASTQ(self.p.trimmed)
     self.renamed = FASTQ(self.p.renamed)
     self.fasta = FASTA(self.p.reads_fasta)
Example #29
0
 def __init__(self, parent):
     # Save parent #
     self.stat, self.parent = parent, parent
     self.tax = parent.tax
     # Paths #
     self.p = AutoPaths(self.parent.p.betadis_dir, self.all_paths)
Example #30
0
 def load(self):
     self.cls = FASTQ
     self.base_dir = self.outcome.p.assembled_dir
     self.p = AutoPaths(self.base_dir, self.all_paths)
     self.path = self.p.orig_fastq
     self.flipped_reads = FASTQ(self.p.flipped, self.samples, self.primers)