def test_validate_otu_ids_and_tree(self): # basic valid input t = TreeNode.read( StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' u'0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 1, 1] otu_ids = ['OTU1', 'OTU2', 'OTU3'] self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None) # all tips observed t = TreeNode.read( StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' u'0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 1, 1, 1, 1] otu_ids = ['OTU1', 'OTU2', 'OTU3', 'OTU4', 'OTU5'] self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None) # no tips observed t = TreeNode.read( StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' u'0.75,OTU5:0.75):1.25):0.0)root;')) counts = [] otu_ids = [] self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None) # all counts zero t = TreeNode.read( StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' u'0.75,OTU5:0.75):1.25):0.0)root;')) counts = [0, 0, 0, 0, 0] otu_ids = ['OTU1', 'OTU2', 'OTU3', 'OTU4', 'OTU5'] self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None) # single node tree t = TreeNode.read(StringIO(u'root;')) counts = [] otu_ids = [] self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None)
def test_validate_otu_ids_and_tree(self): # basic valid input t = TreeNode.read( StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' u'0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 1, 1] otu_ids = ['OTU1', 'OTU2', 'OTU3'] self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None) # all tips observed t = TreeNode.read( StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' u'0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 1, 1, 1, 1] otu_ids = ['OTU1', 'OTU2', 'OTU3', 'OTU4', 'OTU5'] self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None) # no tips observed t = TreeNode.read( StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' u'0.75,OTU5:0.75):1.25):0.0)root;')) counts = [] otu_ids = [] self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None) # all counts zero t = TreeNode.read( StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' u'0.75,OTU5:0.75):1.25):0.0)root;')) counts = [0, 0, 0, 0, 0] otu_ids = ['OTU1', 'OTU2', 'OTU3', 'OTU4', 'OTU5'] self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None) # single node tree t = TreeNode.read(StringIO(u'root;')) counts = [] otu_ids = [] self.assertTrue(_validate_otu_ids_and_tree(counts, otu_ids, t) is None)
def _validate(u_counts, v_counts, otu_ids, tree): _validate_counts_vectors(u_counts, v_counts, suppress_cast=True) _validate_otu_ids_and_tree(counts=u_counts, otu_ids=otu_ids, tree=tree)
def _validate(u_counts, v_counts, otu_ids, tree): _validate_counts_vectors(u_counts, v_counts, suppress_cast=True) _validate_otu_ids_and_tree(counts=u_counts, otu_ids=otu_ids, tree=tree)
def faith_pd(counts, otu_ids, tree, validate=True): """ Compute Faith's phylogenetic diversity metric (PD) Parameters ---------- counts : 1-D array_like, int Vector of counts. otu_ids: list, np.array Vector of OTU ids corresponding to tip names in ``tree``. Must be the same length as ``counts``. tree: skbio.TreeNode Tree relating the OTUs in otu_ids. The set of tip names in the tree can be a superset of ``otu_ids``, but not a subset. validate: bool, optional If `False`, validation of the input won't be performed. This step can be slow, so if validation is run elsewhere it can be disabled here. However, invalid input data can lead to invalid results, so this step should not be bypassed all together. Returns ------- float The phylogenetic diversity (PD) of the samples. Raises ------ ValueError If ``counts`` and ``otu_ids`` are not equal in length. MissingNodeError If an OTU id is provided that does not correspond to a tip in the tree. Notes ----- Faith's phylogenetic diversity, often referred to as PD, was originally described in [1]_. This implementation differs from that in PyCogent (and therefore QIIME versions less than 2.0.0) by imposing a few additional restrictions on the inputs. First, the input tree must be rooted. In PyCogent, if an unrooted tree was provided that had a single trifurcating node (a newick convention for unrooted trees) that node was considered the root of the tree. Next, all OTU IDs must be tips in the tree. PyCogent would silently ignore OTU IDs that were not present the tree. To reproduce Faith PD results from PyCogent with scikit-bio, ensure that your PyCogent Faith PD calculations are performed on a rooted tree and that all OTU IDs are present in the tree. References ---------- .. [1] Faith, D. P. Conservation evaluation and phylogenetic diversity. Biol. Conserv. (1992). """ if validate: counts = _validate_counts_vector(counts) _validate_otu_ids_and_tree(counts, otu_ids, tree) observed_otus = {o: c for o, c in zip(otu_ids, counts) if c >= 1} observed_nodes = tree.observed_node_counts(observed_otus) result = sum(o.length for o in observed_nodes if o.length is not None) return result