Exemple #1
0
 def __init__(self, parent, base_dir=None, short_name=None):
     # Save parent #
     self.parent = parent
     # Base dir #
     if not base_dir: self.base_dir = self.parent.p.graphs_dir
     else: self.base_dir = base_dir
     # Short name #
     if short_name: self.short_name = short_name
     # Paths #
     self.path = FilePath(self.base_dir + self.short_name + '.pdf')
     self.csv_path = self.path.replace_extension('csv')
     # Extra #
     self.dev_mode = False
Exemple #2
0
class Graph(object):
    width = 12.0
    height = 7.0
    bottom = 0.14
    top = 0.93
    left = 0.06
    right = 0.98
    formats = ('pdf',)

    def __init__(self, parent, base_dir=None, short_name=None):
        # Save parent #
        self.parent = parent
        # Base dir #
        if not base_dir: self.base_dir = self.parent.p.graphs_dir
        else: self.base_dir = base_dir
        # Short name #
        if short_name: self.short_name = short_name
        # Paths #
        self.path = FilePath(self.base_dir + self.short_name + '.pdf')
        self.csv_path = self.path.replace_extension('csv')
        # Extra #
        self.dev_mode = False

    def save_plot(self, fig, axes, width=None, height=None, bottom=None, top=None, left=None, right=None, sep=()):
        # Attributes or parameters #
        w = width  if width  != None else self.width
        h = height if height != None else self.height
        b = bottom if bottom != None else self.bottom
        t = top    if top    != None else self.top
        l = left   if left   != None else self.left
        r = right  if right  != None else self.right
        # Adjust #
        fig.set_figwidth(w)
        fig.set_figheight(h)
        fig.subplots_adjust(hspace=0.0, bottom=b, top=t, left=l, right=r)
        # Data and source #
        if self.dev_mode:
            fig.text(0.99, 0.98, time.asctime(), horizontalalignment='right')
            job_name = os.environ.get('SLURM_JOB_NAME', 'Unnamed')
            user_msg = 'user: %s, job: %s' % (getpass.getuser(), job_name)
            fig.text(0.01, 0.98, user_msg, horizontalalignment='left')
        # Nice digit grouping #
        if 'x' in sep:
            seperate = lambda x,pos: split_thousands(x)
            axes.xaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(seperate))
        if 'y' in sep:
            seperate = lambda y,pos: split_thousands(y)
            axes.yaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(seperate))
        # Save it as different formats #
        for ext in self.formats: fig.savefig(self.path.replace_extension(ext))
Exemple #3
0
 def __init__(self, parent):
     # Save parent #
     self.stat, self.parent = parent, parent
     self.tax = parent.tax
     # Paths #
     self.p = AutoPaths(self.parent.p.unifrac_dir, self.all_paths)
     # Files #
     self.clustalo_aligned = FASTA(self.p.clustalo_align)
     self.pynast_aligned = FASTA(self.p.pynast_align)
     self.mothur_aligned = FASTA(self.p.mothur_align)
     self.raxml_tree = FilePath(self.p.raxml_tree)
     self.fasttree_tree = FilePath(self.p.fasttree_tree)
     self.distances_csv = CSVTable(self.p.distances_csv)
     # Graphs #
     self.nmds = NMDS(self, self.distances_csv, calc_distance=False)
Exemple #4
0
 def __init__(self, cluster):
     # Save parent #
     self.cluster, self.parent = cluster, cluster
     # Inherited #
     self.samples = self.parent.samples
     # Paths #
     self.base_dir = self.parent.p.otus_dir + self.short_name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Main FASTA file #
     self.reads = self.parent.reads
     # Files #
     self.all_otus = FilePath(self.p.all_otus)
     self.all_centers = FASTA(self.p.all_centers)
     self.otus = FilePath(self.base_dir + "otus.txt")
     self.centers = FASTA(self.base_dir + "centers.fasta")
     # Taxonomy #
     self.taxonomy_silva = CrestTaxonomy(self.centers, self, 'silvamod', self.p.silva)
     self.taxonomy_fw = CrestTaxonomy(self.centers, self, 'freshwater', self.p.fw_dir)
     # Preferred one #
     self.taxonomy = self.taxonomy_silva
Exemple #5
0
 def __init__(self, json_path, out_dir):
     # Attributes #
     self.out_dir = out_dir
     self.json_path = FilePath(json_path)
     # Parse #
     with open(json_path) as handle:
         self.info = json.load(handle)
     # Basic #
     self.run_num = self.info['run_num']
     self.project_short_name = self.info['project']
     self.project_long_name = self.info['project_name']
     # Own attributes #
     self.num = self.info['sample_num']
     self.short_name = self.info['sample']
     self.long_name = self.info['sample_name']
     self.name = 'run%i_sample%i' % (self.run_num, self.num)
     self.group = self.info['group']
     self.id_name = "run%03d-sample%02d" % (self.run_num, self.num)
     # SFF files #
     self.sff_files_info = self.info['files']
     for f in self.sff_files_info:
         if not os.path.exists(f['path']):
             raise Exception("No file at %s" % f['path'])
     # Automatic paths #
     self.base_dir = self.out_dir + self.id_name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Make an alias to the json #
     self.p.info_json.link_from(self.json_path, safe=True)
     # Pool dummy #
     self.pool, self.parent = self, self
     # Other dummy variables #
     self.bar_len = 0
     self.gziped = False
     self.used = True
     # Primer #
     self.primer_regex = re.compile(self.info['primer'])
     # Raw files #
     self.raw_fasta = FASTA(self.p.raw_fasta)
     self.raw_fastq = FASTQ(self.p.raw_fastq)
     # Standard FASTA #
     self.reads = FASTA(self.p.reads_fasta)
     self.fasta = FASTA(self.p.renamed)
     # Special FASTQ #
     self.fastq = FASTQ(self.p.reads_fastq)
     # A shameless hack for cdhit to work #
     self.renamed = self.fastq
     # Pre-denoised special case #
     if self.info['predenoised'] and False:
         self.sff_files_info = []
         self.reads.link_from(self.info['predenoised'], safe=True)
Exemple #6
0
 def __init__(self, cluster):
     # Save parent #
     self.cluster, self.parent = cluster, cluster
     # Inherited #
     self.samples = self.parent.samples
     # Paths #
     self.base_dir = self.parent.p.otus_dir + self.short_name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Main reads file here FASTQ #
     self.reads = FASTQ(self.p.all_reads)
     # Files #
     self.cdhit_clusters = FilePath(self.p.clstr)
     self.cdhit_centers = FASTA(self.p.clusters_dir + "OTU")
     self.centers = FASTA(self.p.centers)
     # Taxonomy #
     self.taxonomy_silva = CrestTaxonomy(self.centers, self, 'silvamod',
                                         self.p.silva)
     self.taxonomy_fw = CrestTaxonomy(self.centers, self, 'freshwater',
                                      self.p.fw_dir)
     # Preferred one #
     self.taxonomy = self.taxonomy_silva
Exemple #7
0
class UclustOTUs(OTUs):
    """Will use uclust via the qimme wraper to create OTU clusters from a given FASTA file
    http://qiime.org/scripts/pick_otus.html"""

    short_name = 'uclust'
    title = 'UCLUST-QIIME denovo picking'

    all_paths = """
    /clusters/clusters.uc
    /clusters/qiime.log
    /clusters/all_otus.txt
    /clusters/all_centers.fasta
    /centers.fasta
    /otus.txt
    /taxonomy_silva/
    /taxonomy_fw/
    /graphs/
    """

    def __repr__(self): return '<%s object of %s>' % (self.__class__.__name__, self.parent)

    def __init__(self, cluster):
        # Save parent #
        self.cluster, self.parent = cluster, cluster
        # Inherited #
        self.samples = self.parent.samples
        # Paths #
        self.base_dir = self.parent.p.otus_dir + self.short_name + '/'
        self.p = AutoPaths(self.base_dir, self.all_paths)
        # Main FASTA file #
        self.reads = self.parent.reads
        # Files #
        self.all_otus = FilePath(self.p.all_otus)
        self.all_centers = FASTA(self.p.all_centers)
        self.otus = FilePath(self.base_dir + "otus.txt")
        self.centers = FASTA(self.base_dir + "centers.fasta")
        # Taxonomy #
        self.taxonomy_silva = CrestTaxonomy(self.centers, self, 'silvamod', self.p.silva)
        self.taxonomy_fw = CrestTaxonomy(self.centers, self, 'freshwater', self.p.fw_dir)
        # Preferred one #
        self.taxonomy = self.taxonomy_silva

    def run(self):
        # Clean #
        shutil.rmtree(self.p.clusters_dir)
        # Run command #
        pick_otus = sh.Command('pick_otus.py')
        pick_otus('-m', 'uclust', '-i', self.reads, '-o', self.p.clusters_dir)
        # Move into place #
        base_name = self.p.clusters_dir + self.reads.prefix
        shutil.move(base_name + '_otus.txt', self.all_otus)
        shutil.move(base_name + '_otus.log', self.p.qiime_log)
        shutil.move(base_name + '_clusters.uc', self.p.clusters_uc)
        # Remove OTUs that are only one read #
        def filter_singletons(f):
            for line in f:
                line = line.split()
                if len(line) > 2: yield '\t'.join(line) + '\n'
        self.otus.writelines(filter_singletons(self.all_otus))
        # Create the centers file that is missing #
        pick_rep = sh.Command('pick_rep_set.py')
        pick_rep('-i', self.all_otus, '-f', self.reads, '-o', self.all_centers)
        # Remake the centers file without the filtered OTUs #
        self.otus_to_keep = [line.split()[0] for line in self.otus]
        def filter_otus(f):
            for seq in f:
                if seq.id in self.otus_to_keep: yield seq
        self.centers.write(filter_otus(self.all_centers))

    @property_cached
    def cluster_counts_table(self):
        """Create the unfiltered OTU table"""
        # Put results in a dict of dicts #
        result = defaultdict(lambda: defaultdict(int))
        # Loop #
        for line in self.otus:
            # Parse the line #
            contents = line.split()
            otu, reads = contents[0], contents[1:]
            # Parse the hits #
            for r in reads:
                nums = re.findall("run([0-9]+)_pool([0-9]+)_sample([0-9]+)_read([0-9]+)", r)
                if nums:
                    run_num, pool_num, sample_num, read_num = map(int, nums[0])
                    sample = illumitag.runs[run_num][pool_num-1][sample_num-1]
                    name = sample.short_name
                else:
                    nums = re.findall("run([0-9]+)_sample([0-9]+)_read([0-9]+)", r)
                    run_num, sample_num, read_num = map(int, nums[0])
                    sample = [s for s in illumitag.presamples+illumitag.pyrosamples if s.run_num==run_num and s.num==sample_num][0]
                    name = sample.short_name
                # Count #
                result[otu][name] += 1
        # Return #
        result = pandas.DataFrame(result)
        result = result.fillna(0)
        result = result.astype(int)
        result = result.reindex_axis(sorted(result.columns, key=natural_sort), axis=1)
        return result
Exemple #8
0
 def __init__(self, json_path, out_dir):
     # Attributes #
     self.out_dir = out_dir
     self.json_path = FilePath(json_path)
     # Parse #
     with open(json_path) as handle:
         self.info = json.load(handle)
     # Basic #
     self.account = self.info['uppmax_id']
     self.run_num = self.info['run_num']
     self.run_label = self.info['run_id']
     self.project_short_name = self.info['project']
     self.project_long_name = self.info['project_name']
     self.fwd_name = self.info['forward_reads']
     self.rev_name = self.info['reverse_reads']
     # Own attributes #
     self.num = self.info['sample_num']
     self.label = self.info['sample_id']
     self.short_name = self.info['sample']
     self.long_name = self.info['sample_name']
     self.name = 'run%i_sample%i' % (self.run_num, self.num)
     self.group = self.info['group']
     self.id_name = "run%03d-sample%02d" % (self.run_num, self.num)
     self.fwd_mid = self.info['forward_mid']
     self.rev_mid = self.info['forward_mid']
     # Automatic paths #
     self.base_dir = self.out_dir + self.id_name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # Special #
     self.primers = TwoPrimers(self)
     # Samples dummy #
     self.info['samples'] = [{
         "name": self.short_name,
         "used": 1,
         "group": self.group,
         "dummy": 1,
         "num": self.num,
         "fwd": "",
         "rev": ""
     }]
     self.samples = Samples(self)
     self.samples.load()
     # Pool dummy #
     self.pool, self.parent = self, self
     # Files #
     self.fwd_path = home + "ILLUMITAG/INBOX/%s/%s/%s" % (
         self.run_label, self.label, self.fwd_name)
     self.rev_path = home + "ILLUMITAG/INBOX/%s/%s/%s" % (
         self.run_label, self.label, self.rev_name)
     self.gziped = True if self.fwd_path.endswith('gz') else False
     self.fwd = FASTQ(self.p.fwd)
     self.rev = FASTQ(self.p.rev)
     self.fastq = PairedFASTQ(self.fwd.path, self.rev.path, self)
     # Barcode length #
     self.bar_len = 0
     # Make an alias to the json #
     self.p.info_json.link_from(self.json_path, safe=True)
     # Assembly files as children #
     self.assembled = Assembled(self)
     self.unassembled = Unassembled(self)
     self.children = (self.assembled, self.unassembled)
     self.first = self.assembled
     # Graphs #
     self.graphs = [
         getattr(outcome_plots, cls_name)(self)
         for cls_name in outcome_plots.__all__
     ]
     # Runner #
     self.runner = PresampleRunner(self)
     # Final #
     self.trimmed = FASTQ(self.p.trimmed)
     self.renamed = FASTQ(self.p.renamed)
     self.fasta = FASTA(self.p.reads_fasta)
Exemple #9
0
 def __new__(cls, path=None, content=None, **kwargs):
     handle = open(path, 'w') if path else tempfile.NamedTemporaryFile(
         delete=False, **kwargs)
     if content: handle.write(content)
     handle.close()
     return FilePath.__new__(cls, handle.name)