Exemple #1
0
def load_json(fp):
    data = json.loads(clean_json(fp))
    taxonomy = {}
    count_total = 0
    counts = []

    for row in data['ubiome_bacteriacounts']:
        normalise_row(row)
        counts.append(row['count_norm'])
        t = PhyloTree()
        t.name = row['tax_name']
        t.add_features(**row)
        taxonomy[row['taxon']] = t

    root = taxonomy[min(taxonomy.keys())]
    count_total = root.count_norm
    root.alpha = alpha_function(counts)

    for t in taxonomy.values():
        t.add_feature('count_pct', float(t.count_norm) / count_total * 100)
        parent = t.parent
        tp = taxonomy.get(parent)
        if tp is not None:
            tp.add_child(t)
    print('loaded {} into tree depth {} diversity {:.2f}'.format(
        len(taxonomy), len(root), root.alpha))
    return root
Exemple #2
0
    def build_tree(self, sample, rank_limit='None'):

        # Gets taxids of sample.  Gets all taxids if sample is None.
        taxids = set(self.get_all_tax_ids(sample))

        taxid2nodes = {}
        all_nodes = {}
        root_children = []

        for taxid in taxids:
            taxid2nodes[taxid] = []

            taxpath = self.get_taxpath(taxid)
            rank = self.get_rank(taxid)

            if self.rank_position[rank] <= self.rank_position[rank_limit]:

                for node_id in taxpath:

                    if node_id != '':

                        if node_id not in all_nodes:
                            node = all_nodes.setdefault(node_id, PhyloTree())
                            node.name = str(node_id)
                            node.taxid = node_id
                            rank = self.get_rank(node_id)
                            node.add_feature("rank", rank)
                            node.add_feature("sci_name", self.get_name(node_id))

                            if rank == 'superkingdom':
                                root_children.append(node)

                        else:
                            node = all_nodes[node_id] # node already exists

                        taxid2nodes[taxid].append(node)

        # generate parent child relationships
        for taxid in taxid2nodes.keys():
            parent = None
            for node in taxid2nodes[taxid]:
                if parent and node not in parent.children:
                    parent.add_child(node)
                parent = node

        root = PhyloTree()
        root.name = 'root'
        root.taxid = '0'
        root.add_feature("rank", "root")
        root.add_feature("sci_name", "root")

        for child in root_children:
            root.add_child(child)

        tree = root
        if len(root.children) == 1:
            tree = root.children[0].detach()

        return tree