def __init__(self, fasta_path, parent, database='silvamod', base_dir=None): # Parent # self.otu, self.parent = parent, parent # Inherited # self.samples = self.parent.samples # FASTA # self.fasta = FASTA(fasta_path) # The database to use # self.database = database self.database_path = databases[database] # Dir # if base_dir is None: self.base_dir = self.parent.p.crest_dir else: self.base_dir = base_dir self.p = AutoPaths(self.base_dir, self.all_paths) # Graphs # self.graphs = [ getattr(plots, cls_name)(self) for cls_name in plots.__all__ ] # OTU table # self.otu_csv = CSVTable(self.p.otu_csv, d='\t') self.otu_csv_norm = CSVTable(self.p.otu_csv_norm, d='\t') # Filtered centers file # self.centers = FASTA(self.p.centers) # Composition tables # self.comp_phyla = CompositionPhyla(self, self.p.comp_phyla) self.comp_tips = CompositionTips(self, self.p.comp_tips) self.comp_order = CompositionOrder(self, self.p.comp_order) self.comp_class = CompositionClass(self, self.p.comp_class) # Stats # self.stats = StatsOnTaxonomy(self)
def __init__(self, samples, name, base_dir=None): # Save samples # self.name = name self.samples, self.children = samples, samples # Check names are unique # names = [s.short_name for s in samples if s.used] assert len(names) == len(set(names)) # Figure out pools # self.pools = list(set([s.pool for s in self.samples])) self.pools.sort(key=lambda x: x.id_name) # Load them # for p in self.pools: p.load() # Dir # if base_dir: self.base_dir = base_dir else: self.base_dir = illumitag.view_dir + "clusters/" + self.name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Runner # self.runner = ClusterRunner(self) # FASTA # self.reads = FASTA(self.p.all_reads_fasta) # OTU picking # self.otu_uparse = UparseOTUs(self) self.otu_uclust = UclustOTUs(self) self.otu_cdhit = CdhitOTUs(self) # Reporting # self.reporter = ClusterReporter(self)
def __init__(self, parent): # Save parent # self.composition, self.parent = parent, parent # Paths # self.p = AutoPaths(self.parent.p.stats_dir, self.all_paths) # Children # self.nmds = NMDS(self, self.parent.taxa_csv)
def __init__(self, fasta, base_dir): # Base params # self.fasta = fasta if isinstance(fasta, FASTA) else FASTA(fasta) self.base_dir = base_dir self.p = AutoPaths(self.base_dir, self.all_paths) # Extra simple composition # from illumitag.clustering.composition import SimpleComposition self.composition = SimpleComposition(self, self.base_dir + 'comp_' + self.short_name + '/')
def __init__(self, name, pools, out_dir): # Attributes # self.name = name self.pools = pools self.loaded = False # Dir # self.base_dir = out_dir + self.name + '/' self.p = AutoPaths(self.base_dir, self.all_paths)
def __init__(self, parent, base_dir): self.parent, self.taxonomy = parent, parent self.base_dir = base_dir self.p = AutoPaths(self.base_dir, self.all_paths) # Simple graph # self.graph = illumitag.clustering.composition.plots.TaxaBarstack(self) self.graph.bottom = 0.40 self.graph.legend_anchor = -0.3 self.formats = ('pdf',)
def __init__(self, name, pools, projs_dir): # Attributes # self.name = name self.pools, self.children = pools, pools self.loaded = False # Dir # self.base_dir = projs_dir + self.name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Extra # self.meta_data_path = illumitag.repos_dir + 'projects/' + self.name + '.csv'
def __init__(self, parent, csv, calc_distance=True): # Save parent # self.stat, self.parent = parent, parent self.csv = csv # Options # self.calc_distance = calc_distance # Paths # self.base_dir = self.parent.p.nmds_dir self.p = AutoPaths(self.base_dir, self.all_paths) # Graph # self.graph = GraphNMDS(self, base_dir=self.base_dir)
def __init__(self, num, pools, out_dir): # Attributes # self.num = num self.name = "run%i" % num self.pools, self.children = pools, pools self.loaded = False # Dir # self.base_dir = out_dir + self.name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Illumina report # self.xml_report_path = self.directory + "report.xml"
def __init__(self, parent): # Save parent # self.parent, self.pool = parent, parent # Auto paths # self.base_dir = self.parent.p.fractions_dir self.p = AutoPaths(self.base_dir, self.all_paths) # Size fractions # self.low = Fraction(self, self.p.low_dir, 430, 446) self.med = Fraction(self, self.p.med_dir, 447, 464) self.big = Fraction(self, self.p.big_dir, 465, 488) self.children = [self.low, self.med, self.big]
def __init__(self, parent): # Save parent # self.tax, self.parent = parent, parent # Paths # self.p = AutoPaths(self.parent.p.stats_dir, self.all_paths) # R stuff # self.nmds = NMDS(self, self.parent.otu_csv) self.permanova = PERMANOVA(self) self.betadis = BetaDispersion(self) # Other # self.unifrac = Unifrac(self)
def __init__(self, parent, base_dir=None): # Parent # self.otu, self.parent = parent, parent self.taxonomy = self.parent.taxonomy # Inherited # self.samples = self.parent.samples # Dir # if base_dir is None: self.base_dir = self.parent.p.seqenv else: self.base_dir = base_dir self.p = AutoPaths(self.base_dir, self.all_paths) # Files # self.abundances = CSVTable(self.p.abundances)
def load(self): # Special case for dummy samples # if self.info.get('dummy'): return # Paths # self.base_dir = self.pool.p.samples_dir + self.bar_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) self.path = str(self.p.orig_fastq) # Distances # self.trim_fwd = self.pool.samples.trim_fwd self.trim_rev = self.pool.samples.trim_rev # Files # self.trimmed = FASTQ(self.p.trimmed) self.renamed = FASTQ(self.p.renamed) self.fasta = FASTA(self.p.reads_fasta)
def __init__(self, fasta_path, base_dir, parent, verbose=False): # Base # self.fasta = FASTA(fasta_path) self.parent = parent self.verbose = verbose # Auto paths # self.base_dir = base_dir self.p = AutoPaths(self.base_dir, self.all_paths) # Files # self.derep_cluster = SizesFASTA(self.p.derep_cluster) self.cluster_99 = SizesFASTA(self.p.cluster_99) self.positive = SizesFASTA(self.p.positive) self.negative = SizesFASTA(self.p.negative) self.subsampled = FASTA(self.p.subsampled)
def __init__(self, json_path, out_dir): # Attributes # self.out_dir = out_dir self.json_path = FilePath(json_path) # Parse # with open(json_path) as handle: self.info = json.load(handle) # Basic # self.run_num = self.info['run_num'] self.project_short_name = self.info['project'] self.project_long_name = self.info['project_name'] # Own attributes # self.num = self.info['sample_num'] self.short_name = self.info['sample'] self.long_name = self.info['sample_name'] self.name = 'run%i_sample%i' % (self.run_num, self.num) self.group = self.info['group'] self.id_name = "run%03d-sample%02d" % (self.run_num, self.num) # SFF files # self.sff_files_info = self.info['files'] for f in self.sff_files_info: if not os.path.exists(f['path']): raise Exception("No file at %s" % f['path']) # Automatic paths # self.base_dir = self.out_dir + self.id_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Make an alias to the json # self.p.info_json.link_from(self.json_path, safe=True) # Pool dummy # self.pool, self.parent = self, self # Other dummy variables # self.bar_len = 0 self.gziped = False self.used = True # Primer # self.primer_regex = re.compile(self.info['primer']) # Raw files # self.raw_fasta = FASTA(self.p.raw_fasta) self.raw_fastq = FASTQ(self.p.raw_fastq) # Standard FASTA # self.reads = FASTA(self.p.reads_fasta) self.fasta = FASTA(self.p.renamed) # Special FASTQ # self.fastq = FASTQ(self.p.reads_fastq) # A shameless hack for cdhit to work # self.renamed = self.fastq # Pre-denoised special case # if self.info['predenoised'] and False: self.sff_files_info = [] self.reads.link_from(self.info['predenoised'], safe=True)
def __init__(self, parent, base_dir=None): # Parent # self.taxonomy, self.parent = parent, parent # Inherited # self.samples = self.parent.samples # Dir # if base_dir is None: self.base_dir = self.parent.p.composition_dir else: self.base_dir = base_dir self.p = AutoPaths(self.base_dir, self.all_paths) # Graphs # self.graphs = [getattr(plots, cls_name)(self) for cls_name in plots.__all__] # Taxa table # self.taxa_csv = CSVTable(self.p.taxa_csv) # Stats # self.stats = StatsOnComposition(self)
def __init__(self, parent): # Save parent # self.stat, self.parent = parent, parent self.tax = parent.tax # Paths # self.p = AutoPaths(self.parent.p.unifrac_dir, self.all_paths) # Files # self.clustalo_aligned = FASTA(self.p.clustalo_align) self.pynast_aligned = FASTA(self.p.pynast_align) self.mothur_aligned = FASTA(self.p.mothur_align) self.raxml_tree = FilePath(self.p.raxml_tree) self.fasttree_tree = FilePath(self.p.fasttree_tree) self.distances_csv = CSVTable(self.p.distances_csv) # Graphs # self.nmds = NMDS(self, self.distances_csv, calc_distance=False)
def __init__(self, parent, base_dir, lower_bound, upper_bound): # Save parent # self.parent, self.fractions = parent, parent # Auto paths # self.base_dir = base_dir self.p = AutoPaths(self.base_dir, self.all_paths) # Bounds # self.lower_bound = lower_bound self.upper_bound = upper_bound # Size fractions # self.reads = FASTA(self.p.reads_fasta) self.refere = UchimeRef(self.p.reads, self.p.refere_dir, self) self.denovo = UchimeDenovo(self.p.reads, self.p.denovo_dir, self) # Classification # self.rdp = SimpleRdpTaxonomy(self.reads, self.p.rdp_dir) self.crest = SimpleCrestTaxonomy(self.reads, self.p.crest_dir)
def __init__(self, parent): # Save parent # self.parent, self.assemble_group = parent, parent self.samples = parent.samples self.pool = self.parent.pool # Auto paths # self.base_dir = parent.p.groups_dir + self.short_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # More # self.orig_reads = self.parent.cls(self.p.orig_fastq, samples=self.samples) self.n_filtered = self.parent.cls(self.p.n_filtered, samples=self.samples) # Quality filtered # if self.parent == 'assembled': self.qual_filtered = FASTQ(self.p.qual_filtered, samples=self.samples) self.len_filtered = FASTQ(self.p.len_filtered_fastq, samples=self.samples) self.trimmed_barcodes = FASTA(self.p.trimmed_barcodes) # Further # self.load()
def __init__(self, cluster): # Save parent # self.cluster, self.parent = cluster, cluster # Inherited # self.samples = self.parent.samples # Paths # self.base_dir = self.parent.p.otus_dir + self.short_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Main FASTA file # self.reads = self.parent.reads # Files # self.all_otus = FilePath(self.p.all_otus) self.all_centers = FASTA(self.p.all_centers) self.otus = FilePath(self.base_dir + "otus.txt") self.centers = FASTA(self.base_dir + "centers.fasta") # Taxonomy # self.taxonomy_silva = CrestTaxonomy(self.centers, self, 'silvamod', self.p.silva) self.taxonomy_fw = CrestTaxonomy(self.centers, self, 'freshwater', self.p.fw_dir) # Preferred one # self.taxonomy = self.taxonomy_silva
def __init__(self, path, parent): # Save parent # self.parent, self.pool = parent, parent self.samples = parent.samples # Auto paths # self.base_dir = parent.p.quality_dir + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Files # self.untrimmed = FASTQ(path, samples=self.samples) self.only_used = FASTA(self.p.only_used, samples=self.samples) self.trimmed = FASTA(self.p.trimmed) # Qiime output # self.qiime_fasta = FASTA(self.p.qiime_fasta) # Mothur # self.mothur_fasta = FASTA(self.p.mothur_fasta) self.mothur_qual = QualFile(self.p.mothur_qual) self.mothur_groups = GroupFile(self.p.mothur_groups) # Primer size # self.trim_fwd = self.pool.samples.trim_fwd self.trim_rev = self.pool.samples.trim_rev
def __init__(self, parent): # Save parent # self.parent, self.pool = parent, parent self.samples = parent.samples # Paths # self.base_dir = self.pool.p.groups_dir + self.short_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Super # self.fwd_path = self.p.fwd_fastq self.rev_path = self.p.rev_fastq self.gziped = True if self.fwd_path.endswith('gz') else False # Add assembly files # self.assembled = Assembled(self) self.unassembled = Unassembled(self) self.children = (self.assembled, self.unassembled) self.first = self.assembled # Graphs # self.graphs = [ getattr(outcome_plots, cls_name)(self) for cls_name in outcome_plots.__all__ ]
def __init__(self, cluster): # Save parent # self.cluster, self.parent = cluster, cluster # Inherited # self.samples = self.parent.samples # Paths # self.base_dir = self.parent.p.otus_dir + self.short_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Main reads file here FASTQ # self.reads = FASTQ(self.p.all_reads) # Files # self.cdhit_clusters = FilePath(self.p.clstr) self.cdhit_centers = FASTA(self.p.clusters_dir + "OTU") self.centers = FASTA(self.p.centers) # Taxonomy # self.taxonomy_silva = CrestTaxonomy(self.centers, self, 'silvamod', self.p.silva) self.taxonomy_fw = CrestTaxonomy(self.centers, self, 'freshwater', self.p.fw_dir) # Preferred one # self.taxonomy = self.taxonomy_silva
def __init__(self, parent): # Save parent # self.parent, self.outcome = parent, parent self.samples = parent.samples # Extra # self.pool = self.outcome.parent self.samples = self.pool.samples self.primers = self.pool.primers # Load # self.load() # Auto paths # self.p = AutoPaths(self.base_dir, self.all_paths) # All primer outcomes # self.good_primers = GoodPrimers(self) self.wrong_primers = WrongPrimers(self) self.only_fwd_primers = OnlyFwdPrimers(self) self.only_rev_primers = OnlyRevPrimers(self) self.no_primers = NoPrimers(self) # Group them # self.children = (self.good_primers, self.wrong_primers, self.only_fwd_primers, self.only_rev_primers, self.no_primers) self.first = self.good_primers
def __init__(self, fasta_path, parent, base_dir=None): # Parent # self.otu, self.parent = parent, parent # Inherited # self.samples = self.parent.samples # FASTA # self.fasta = FASTA(fasta_path) # Dir # if base_dir is None: self.base_dir = self.parent.p.rdp_dir else: self.base_dir = base_dir self.p = AutoPaths(self.base_dir, self.all_paths) # Graphs # self.graphs = [ getattr(plots, cls_name)(self) for cls_name in plots.__all__ ] # Tables # self.otu_csv = CSVTable(self.p.otu_csv) # Composition tables # #self.comp_phyla = CompositionPhyla(self, self.p.comp_phyla) #self.comp_tips = CompositionTips(self, self.p.comp_tips) # Stats # self.stats = StatsOnTaxonomy(self)
def __init__(self, cluster): # Save parent # self.cluster, self.parent = cluster, cluster # Inherited # self.samples = self.parent.samples # Paths # self.base_dir = self.parent.p.otus_dir + self.short_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Main FASTA file # self.reads = self.parent.reads # Files # self.derep = SizesFASTA(self.p.derep) self.sorted = SizesFASTA(self.p.sorted) self.centers = FASTA(self.p.centers) self.readmap = UClusterFile(self.p.readmap) # Taxonomy # self.taxonomy_silva = CrestTaxonomy(self.centers, self, 'silvamod', self.p.silva_dir) self.taxonomy_fw = CrestTaxonomy(self.centers, self, 'freshwater', self.p.fw_dir) self.taxonomy_rpd = RdpTaxonomy(self.centers, self) # Preferred one # self.taxonomy = self.taxonomy_silva # Source tracking # self.seqenv = Seqenv(self)
def load(self): """A second __init__ that is delayed, solves some circular references""" # Automatic paths # self.base_dir = self.out_dir + self.id_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Make an alias to the json # self.p.info_json.link_from(self.json_path, safe=True) # Children # self.samples.load() # Raw file pairs # self.fwd_path = home + "ILLUMITAG/INBOX/%s/%s/%s" % (self.run.label, self.label, self.info['forward_reads']) self.rev_path = home + "ILLUMITAG/INBOX/%s/%s/%s" % (self.run.label, self.label, self.info['reverse_reads']) self.fwd = FASTQ(self.fwd_path) self.rev = FASTQ(self.rev_path) self.fastq = PairedFASTQ(self.fwd.path, self.rev.path, self) # Barcode length # self.bar_len = self.samples.bar_len # Make Outcomes # self.no_barcodes = NoBarcode(self) self.one_barcodes = OneBarcode(self) self.same_barcodes = SameBarcode(self) self.bad_barcodes = BadBarcode(self) self.good_barcodes = GoodBarcode(self) self.outcomes = (self.good_barcodes, self.no_barcodes, self.one_barcodes, self.same_barcodes, self.bad_barcodes) self.children = self.outcomes # The good reads # self.quality_reads = QualityReads(self.good_barcodes.assembled.good_primers.len_filtered, self) self.fractions = Fractions(self) # Runner # self.runner = PoolRunner(self) # Graphs # self.graphs = [getattr(pool_plots, cls_name)(self) for cls_name in pool_plots.__all__] # Loaded # self.loaded = True # Return self for convenience # return self
def __init__(self, json_path, out_dir): # Attributes # self.out_dir = out_dir self.json_path = FilePath(json_path) # Parse # with open(json_path) as handle: self.info = json.load(handle) # Basic # self.account = self.info['uppmax_id'] self.run_num = self.info['run_num'] self.run_label = self.info['run_id'] self.project_short_name = self.info['project'] self.project_long_name = self.info['project_name'] self.fwd_name = self.info['forward_reads'] self.rev_name = self.info['reverse_reads'] # Own attributes # self.num = self.info['sample_num'] self.label = self.info['sample_id'] self.short_name = self.info['sample'] self.long_name = self.info['sample_name'] self.name = 'run%i_sample%i' % (self.run_num, self.num) self.group = self.info['group'] self.id_name = "run%03d-sample%02d" % (self.run_num, self.num) self.fwd_mid = self.info['forward_mid'] self.rev_mid = self.info['forward_mid'] # Automatic paths # self.base_dir = self.out_dir + self.id_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Special # self.primers = TwoPrimers(self) # Samples dummy # self.info['samples'] = [{ "name": self.short_name, "used": 1, "group": self.group, "dummy": 1, "num": self.num, "fwd": "", "rev": "" }] self.samples = Samples(self) self.samples.load() # Pool dummy # self.pool, self.parent = self, self # Files # self.fwd_path = home + "ILLUMITAG/INBOX/%s/%s/%s" % ( self.run_label, self.label, self.fwd_name) self.rev_path = home + "ILLUMITAG/INBOX/%s/%s/%s" % ( self.run_label, self.label, self.rev_name) self.gziped = True if self.fwd_path.endswith('gz') else False self.fwd = FASTQ(self.p.fwd) self.rev = FASTQ(self.p.rev) self.fastq = PairedFASTQ(self.fwd.path, self.rev.path, self) # Barcode length # self.bar_len = 0 # Make an alias to the json # self.p.info_json.link_from(self.json_path, safe=True) # Assembly files as children # self.assembled = Assembled(self) self.unassembled = Unassembled(self) self.children = (self.assembled, self.unassembled) self.first = self.assembled # Graphs # self.graphs = [ getattr(outcome_plots, cls_name)(self) for cls_name in outcome_plots.__all__ ] # Runner # self.runner = PresampleRunner(self) # Final # self.trimmed = FASTQ(self.p.trimmed) self.renamed = FASTQ(self.p.renamed) self.fasta = FASTA(self.p.reads_fasta)
def __init__(self, parent): # Save parent # self.stat, self.parent = parent, parent self.tax = parent.tax # Paths # self.p = AutoPaths(self.parent.p.betadis_dir, self.all_paths)
def load(self): self.cls = FASTQ self.base_dir = self.outcome.p.assembled_dir self.p = AutoPaths(self.base_dir, self.all_paths) self.path = self.p.orig_fastq self.flipped_reads = FASTQ(self.p.flipped, self.samples, self.primers)