Exemple #1
0
 def __init__(self, profile, taxfile, outfile, group_file=None, expand=1):
     self.table = TableParser(profile)
     self.otu_tax = TaxParser(taxfile)
     self.group = parse_group_file(group_file)
     if self.group is not None:
         self.groups = list(set(self.group.itervalues()))
     else:
         self.groups = None
     self.outfile = outfile
     self.expand = expand
     self.tax_level = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']
     self.tax_dict = {
         'k': 'kingdom',
         'p': 'phylum',
         'c': 'class',
         'o': 'order',
         'f': 'family',
         'g': 'genus',
         's': 'species',
     }
     self.samples = {}
     self.node_list = {}
     self.root_nodes = {}
from __future__ import division
import sys
import os
import argparse
from util import mkdir

this_script_path = os.path.dirname(__file__)
sys.path.insert(1, this_script_path + '/../src')
from Parser import TableParser


def read_params(args):
    parser = argparse.ArgumentParser(description='get otu profile uniform | v1.0 at 2015/09/29 by liangzb')
    parser.add_argument('-i', '--infile', dest='infile', metavar='FILE', type=str, required=True,
                        help="set the un-uniformed profile produced by biom convert")
    parser.add_argument('-o', '--out_file', dest='outfile', metavar='FILE', type=str, required=True,
                        help="set the output uniform profile")

    args = parser.parse_args()
    params = vars(args)
    return params


if __name__ == '__main__':
    params = read_params(sys.argv)
    mkdir(os.path.dirname(params['outfile']))
    table = TableParser(params['infile'])
    table.parse_table()
    table.get_uniform()
    table.write_table(params['outfile'])
Exemple #3
0
class Subject(object):
    def __init__(self, profile, taxfile, outfile, group_file=None, expand=1):
        self.table = TableParser(profile)
        self.otu_tax = TaxParser(taxfile)
        self.group = parse_group_file(group_file)
        if self.group is not None:
            self.groups = list(set(self.group.itervalues()))
        else:
            self.groups = None
        self.outfile = outfile
        self.expand = expand
        self.tax_level = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']
        self.tax_dict = {
            'k': 'kingdom',
            'p': 'phylum',
            'c': 'class',
            'o': 'order',
            'f': 'family',
            'g': 'genus',
            's': 'species',
        }
        self.samples = {}
        self.node_list = {}
        self.root_nodes = {}

    def read_table(self):
        self.table.parse_table()
        self.table.get_uniform()
        self.samples = self.table.sample_list

    def read_tax(self):
        self.otu_tax.parse()
        self.otu_tax.remove_other()
        self.otu_tax = self.otu_tax.OTUs

    def get_tax_tree(self):
        for otu, otu_tax in self.otu_tax.iteritems():
            root_level = self.tax_level[0]
            father_level = root_level
            try:
                tax_name = otu_tax[root_level]
            except KeyError:
                if otu_tax:
                    print otu_tax
                    raise
                else:
                    continue
            if tax_name not in self.root_nodes:
                root_node = Node(root_level, tax_name, self.samples, groups=self.groups)
                self.node_list[tax_name] = root_node
                self.root_nodes[tax_name] = root_node
            levels = list(otu_tax.iterkeys())
            for ind, level in enumerate(levels):
                if level not in otu_tax:
                    continue
                father_node = self.node_list[otu_tax[father_level]]
                father_level = level
                tax_name = otu_tax[level]
                if tax_name in self.node_list:
                    continue
                node = Node(level, tax_name, self.samples, groups=self.groups)
                self.node_list[tax_name] = node
                if tax_name not in father_node.sub_node:
                    father_node.sub_node[tax_name] = node

    # TODO:
    #     simplify beblow functions
    #
    def get_node_profile(self, node):
        for sample in self.samples:
            for otu, otu_tax in self.otu_tax.iteritems():
                if node.level not in otu_tax or otu_tax[node.level] != node.name:
                    continue
                try:
                    node.profile[sample] += self.table.samples[sample][otu]
                except KeyError:
                    continue
        if not node.sub_node:
            return None
        for sub_node in node.sub_node.itervalues():
            self.get_node_profile(sub_node)

    def get_profile(self):
        for node in self.root_nodes.itervalues():
            self.get_node_profile(node)

    def get_node_group_profile(self, node):
        deleted = set()
        for sample, group in self.group.iteritems():
            for otu, otu_tax in self.otu_tax.iteritems():
                if node.level not in otu_tax or otu_tax[node.level] != node.name:
                    continue
                try:
                    node.group_profile[group] += self.table.samples[sample][otu]
                except KeyError, ex:
                    sample_name = str(ex).strip("'")
                    if sample_name not in deleted:
                        sys.stderr.write(sample_name)
                    deleted.add(sample_name)
                    continue
        for d in deleted:
            del self.group[d]
        if not node.sub_node:
            return None
        for sub_node in node.sub_node.itervalues():
            self.get_node_group_profile(sub_node)
    parser = argparse.ArgumentParser(
        description='get otu profile uniform | v1.0 at 2015/09/29 by liangzb')
    parser.add_argument(
        '-i',
        '--infile',
        dest='infile',
        metavar='FILE',
        type=str,
        required=True,
        help="set the un-uniformed profile produced by biom convert")
    parser.add_argument('-o',
                        '--out_file',
                        dest='outfile',
                        metavar='FILE',
                        type=str,
                        required=True,
                        help="set the output uniform profile")

    args = parser.parse_args()
    params = vars(args)
    return params


if __name__ == '__main__':
    params = read_params(sys.argv)
    mkdir(os.path.dirname(params['outfile']))
    table = TableParser(params['infile'])
    table.parse_table()
    table.get_uniform()
    table.write_table(params['outfile'])