Exemple #1
0
 def test_unweighted_unifrac_symmetry(self):
     for i in range(len(self.table1)):
         for j in range(len(self.table1)):
             actual = unweighted_unifrac(
                 self.table1[i], self.table1[j], self.oids1, self.t1)
             expected = unweighted_unifrac(
                 self.table1[j], self.table1[i], self.oids1, self.t1)
             self.assertAlmostEqual(actual, expected)
Exemple #2
0
 def test_unweighted_unifrac_symmetry(self):
     for i in range(len(self.b1)):
         for j in range(len(self.b1)):
             actual = unweighted_unifrac(self.b1[i], self.b1[j], self.oids1,
                                         self.t1)
             expected = unweighted_unifrac(self.b1[j], self.b1[i],
                                           self.oids1, self.t1)
             self.assertAlmostEqual(actual, expected)
Exemple #3
0
 def test_unweighted_extra_tips(self):
     # UniFrac values are the same despite unobserved tips in the tree
     for i in range(len(self.b1)):
         for j in range(len(self.b1)):
             actual = unweighted_unifrac(
                 self.b1[i], self.b1[j], self.oids1, self.t1_w_extra_tips)
             expected = unweighted_unifrac(
                 self.b1[i], self.b1[j], self.oids1, self.t1)
             self.assertAlmostEqual(actual, expected)
Exemple #4
0
 def test_unweighted_extra_tips(self):
     # UniFrac values are the same despite unobserved tips in the tree
     for i in range(len(self.b1)):
         for j in range(len(self.b1)):
             actual = unweighted_unifrac(self.b1[i], self.b1[j], self.oids1,
                                         self.t1_w_extra_tips)
             expected = unweighted_unifrac(self.b1[i], self.b1[j],
                                           self.oids1, self.t1)
             self.assertAlmostEqual(actual, expected)
Exemple #5
0
 def test_unweighted_unifrac_non_overlapping(self):
     # these communities only share the root node
     actual = unweighted_unifrac(
         self.table1[4], self.table1[5], self.oids1, self.t1)
     expected = 1.0
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(
         [1, 1, 1, 0, 0], [0, 0, 0, 1, 1], self.oids1, self.t1)
     expected = 1.0
     self.assertAlmostEqual(actual, expected)
Exemple #6
0
 def test_unweighted_unifrac_non_overlapping(self):
     # these communities only share the root node
     actual = unweighted_unifrac(self.b1[4], self.b1[5], self.oids1,
                                 self.t1)
     expected = 1.0
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac([1, 1, 1, 0, 0], [0, 0, 0, 1, 1],
                                 self.oids1, self.t1)
     expected = 1.0
     self.assertAlmostEqual(actual, expected)
Exemple #7
0
 def test_unweighted_unifrac_zero_counts(self):
     actual = unweighted_unifrac([1, 1, 1, 0, 0], [0, 0, 0, 0, 0],
                                 self.oids1, self.t1)
     expected = 1.0
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac([0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
                                 self.oids1, self.t1)
     expected = 0.0
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac([], [], [], self.t1)
     expected = 0.0
     self.assertAlmostEqual(actual, expected)
Exemple #8
0
    def test_unweighted_minimal_trees(self):
        # expected values computed by hand
        # zero tips
        tree = TreeNode.read(StringIO(u'root;'))
        actual = unweighted_unifrac([], [], [], tree)
        expected = 0.0
        self.assertEqual(actual, expected)

        # two tips
        tree = TreeNode.read(StringIO(u'(OTU1:0.25, OTU2:0.25)root;'))
        actual = unweighted_unifrac([1, 0], [0, 0], ['OTU1', 'OTU2'], tree)
        expected = 1.0
        self.assertEqual(actual, expected)
Exemple #9
0
 def test_unweighted_unifrac_zero_counts(self):
     actual = unweighted_unifrac(
         [1, 1, 1, 0, 0], [0, 0, 0, 0, 0], self.oids1, self.t1)
     expected = 1.0
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(
         [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], self.oids1, self.t1)
     expected = 0.0
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(
         [], [], [], self.t1)
     expected = 0.0
     self.assertAlmostEqual(actual, expected)
Exemple #10
0
def compute_beta_unifraq(df, m):
    from skbio.diversity.beta import unweighted_unifrac, weighted_unifrac

    # get the phylogenetic tree and drop OTUs that are not NCBI annotated
    otu_ids = list(df.index.values)
    tree, notfound = load_taxonomy_tree(otu_ids)
    df = df.drop(list(notfound))

    # The numpy matrix of counts, in which the rows are sample counts
    mt = df.values.T.astype(np.dtype('int64'))
    # Sample list
    sl = list(df.columns.values)
    # OTU list
    otu_ids = list(df.index.values)

    # The beta diversity matrix
    nsamples = len(sl)
    bm = np.zeros((nsamples, nsamples))
    # Compute the pairwise unifraq
    for i in range(nsamples):
        for j in range(i):
            u_counts = mt[i]
            v_counts = mt[j]
            if m == "unifraq":
                uu = unweighted_unifrac(u_counts, v_counts, otu_ids, tree)
            if m == "wunifraq":
                uu = weighted_unifrac(u_counts, v_counts, otu_ids, tree)
            bm[i, j] = uu
            bm[j, i] = uu
    return bm
Exemple #11
0
    def test_unweighted_otus_out_of_order(self):
        # UniFrac API does not assert the observations are in tip order of the
        # input tree
        shuffled_ids = self.oids1[:]
        shuffled_b1 = self.b1.copy()

        shuffled_ids[0], shuffled_ids[-1] = shuffled_ids[-1], shuffled_ids[0]
        shuffled_b1[:, [0, -1]] = shuffled_b1[:, [-1, 0]]

        for i in range(len(self.b1)):
            for j in range(len(self.b1)):
                actual = unweighted_unifrac(self.b1[i], self.b1[j], self.oids1,
                                            self.t1)
                expected = unweighted_unifrac(shuffled_b1[i], shuffled_b1[j],
                                              shuffled_ids, self.t1)
                self.assertAlmostEqual(actual, expected)
Exemple #12
0
    def test_unweighted_otus_out_of_order(self):
        # UniFrac API does not assert the observations are in tip order of the
        # input tree
        shuffled_ids = self.oids1[:]
        shuffled_b1 = self.b1.copy()

        shuffled_ids[0], shuffled_ids[-1] = shuffled_ids[-1], shuffled_ids[0]
        shuffled_b1[:, [0, -1]] = shuffled_b1[:, [-1, 0]]

        for i in range(len(self.b1)):
            for j in range(len(self.b1)):
                actual = unweighted_unifrac(
                    self.b1[i], self.b1[j], self.oids1, self.t1)
                expected = unweighted_unifrac(
                    shuffled_b1[i], shuffled_b1[j], shuffled_ids, self.t1)
                self.assertAlmostEqual(actual, expected)
Exemple #13
0
 def test_unweighted_minimal_trees(self):
     # two tips
     tree = TreeNode.read(StringIO('(OTU1:0.25, OTU2:0.25)root;'))
     actual = unweighted_unifrac([1, 0], [0, 0], ['OTU1', 'OTU2'],
                                 tree)
     expected = 1.0
     self.assertEqual(actual, expected)
Exemple #14
0
 def test_unweighted_unifrac_qiime_tiny_test(self):
     dm_fp = get_data_path(
         os.path.join('qiime-191-tt', 'unweighted_unifrac_dm.txt'), 'data')
     expected = DistanceMatrix.read(dm_fp)
     for sid1 in self.q_table.columns:
         for sid2 in self.q_table.columns:
             actual = unweighted_unifrac(
                 self.q_table[sid1], self.q_table[sid2],
                 otu_ids=self.q_table.index, tree=self.q_tree)
             self.assertAlmostEqual(actual, expected[sid1, sid2])
Exemple #15
0
    def test_unweighted_root_not_observed(self):
        # expected values computed with QIIME 1.9.1 and by hand
        # root node not observed, but branch between (OTU1, OTU2) and root
        # is considered shared
        actual = unweighted_unifrac([1, 1, 0, 0], [1, 0, 0, 0],
                                    self.oids2, self.t2)
        # for clarity of what I'm testing, compute expected as it would
        # based on the branch lengths. the values that compose shared was
        # a point of confusion for me here, so leaving these in for
        # future reference
        expected = 0.2 / (0.1 + 0.2 + 0.3)  # 0.3333333333
        self.assertAlmostEqual(actual, expected)

        # root node not observed, but branch between (OTU3, OTU4) and root
        # is considered shared
        actual = unweighted_unifrac([0, 0, 1, 1], [0, 0, 1, 0],
                                    self.oids2, self.t2)
        # for clarity of what I'm testing, compute expected as it would
        # based on the branch lengths. the values that compose shared was
        # a point of confusion for me here, so leaving these in for
        # future reference
        expected = 0.7 / (1.1 + 0.5 + 0.7)  # 0.3043478261
        self.assertAlmostEqual(actual, expected)
Exemple #16
0
    def test_unweighted_root_not_observed(self):
        # expected values computed with QIIME 1.9.1 and by hand
        # root node not observed, but branch between (OTU1, OTU2) and root
        # is considered shared
        actual = unweighted_unifrac([1, 1, 0, 0], [1, 0, 0, 0], self.oids2,
                                    self.t2)
        # for clarity of what I'm testing, compute expected as it would
        # based on the branch lengths. the values that compose shared was
        # a point of confusion for me here, so leaving these in for
        # future reference
        expected = 0.2 / (0.1 + 0.2 + 0.3)  # 0.3333333333
        self.assertAlmostEqual(actual, expected)

        # root node not observed, but branch between (OTU3, OTU4) and root
        # is considered shared
        actual = unweighted_unifrac([0, 0, 1, 1], [0, 0, 1, 0], self.oids2,
                                    self.t2)
        # for clarity of what I'm testing, compute expected as it would
        # based on the branch lengths. the values that compose shared was
        # a point of confusion for me here, so leaving these in for
        # future reference
        expected = 0.7 / (1.1 + 0.5 + 0.7)  # 0.3043478261
        self.assertAlmostEqual(actual, expected)
def get_unifrac(
    otu_file_1: pathlib.Path,
    otu_file_2: pathlib.Path,
    tree_file: pathlib.Path,
    weighted: bool,
    threshold: int,
):
    otu_1 = load_table(str(otu_file_1)).to_dataframe(dense=True)
    otu_2 = load_table(str(otu_file_2)).to_dataframe(dense=True)
    tree = TreeNode.read(str(tree_file))
    unifrac_data = dict()
    for u, v, otu_ids, col in get_vectors(otu_1, otu_2, threshold):
        if weighted:
            unifrac_value = weighted_unifrac(
                u, v, otu_ids, tree, normalized=True, validate=True
            )
        else:
            unifrac_value = unweighted_unifrac(u, v, otu_ids, tree, validate=True)
        unifrac_data[col] = unifrac_value
    return pd.Series(unifrac_data), otu_1.shape[0], otu_2.shape[0]
Exemple #18
0
from io import StringIO
from skbio import TreeNode
from skbio.diversity.beta import unweighted_unifrac

tree = TreeNode.read(
    StringIO('(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,'
             '(OTU4:0.75,(OTU5:0.5,((OTU6:0.33,OTU7:0.62):0.5'
             ',OTU8:0.5):0.5):0.5):1.25):0.0)root;'))

u_counts = [1, 0, 0, 4, 1, 2, 3, 0]
v_counts = [0, 1, 1, 6, 0, 1, 0, 0]
otu_ids = ['OTU1', 'OTU2', 'OTU3', 'OTU4', 'OTU5', 'OTU6', 'OTU7', 'OTU8']
uu = unweighted_unifrac(u_counts, v_counts, otu_ids, tree)
print(round(uu, 2))

from ete3 import PhyloTree

t = PhyloTree('((H,I), A, (B,(C,D)))root;', format=1)
print(t)
D = t & "D"
# Get the path from B to the root
node = D
path = []
while node.up:
    path.append(node)
    node = node.up
# I substract D node from the total number of visited nodes
print("There are", len(path) - 1, "nodes between D and the root")
A = t & "A"
# Get the path from B to the root
node = A
Exemple #19
0
 def test_unweighted_unifrac(self):
     # expected results derived from QIIME 1.9.1, which
     # is a completely different implementation skbio's initial
     # unweighted unifrac implementation
     # sample A versus all
     actual = unweighted_unifrac(self.b1[0], self.b1[1], self.oids1,
                                 self.t1)
     expected = 0.238095238095
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(self.b1[0], self.b1[2], self.oids1,
                                 self.t1)
     expected = 0.52
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(self.b1[0], self.b1[3], self.oids1,
                                 self.t1)
     expected = 0.52
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(self.b1[0], self.b1[4], self.oids1,
                                 self.t1)
     expected = 0.545454545455
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(self.b1[0], self.b1[5], self.oids1,
                                 self.t1)
     expected = 0.619047619048
     self.assertAlmostEqual(actual, expected)
     # sample B versus remaining
     actual = unweighted_unifrac(self.b1[1], self.b1[2], self.oids1,
                                 self.t1)
     expected = 0.347826086957
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(self.b1[1], self.b1[3], self.oids1,
                                 self.t1)
     expected = 0.347826086957
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(self.b1[1], self.b1[4], self.oids1,
                                 self.t1)
     expected = 0.68
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(self.b1[1], self.b1[5], self.oids1,
                                 self.t1)
     expected = 0.421052631579
     self.assertAlmostEqual(actual, expected)
     # sample C versus remaining
     actual = unweighted_unifrac(self.b1[2], self.b1[3], self.oids1,
                                 self.t1)
     expected = 0.0
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(self.b1[2], self.b1[4], self.oids1,
                                 self.t1)
     expected = 0.68
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(self.b1[2], self.b1[5], self.oids1,
                                 self.t1)
     expected = 0.421052631579
     self.assertAlmostEqual(actual, expected)
     # sample D versus remaining
     actual = unweighted_unifrac(self.b1[3], self.b1[4], self.oids1,
                                 self.t1)
     expected = 0.68
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(self.b1[3], self.b1[5], self.oids1,
                                 self.t1)
     expected = 0.421052631579
     self.assertAlmostEqual(actual, expected)
     # sample E versus remaining
     actual = unweighted_unifrac(self.b1[4], self.b1[5], self.oids1,
                                 self.t1)
     expected = 1.0
     self.assertAlmostEqual(actual, expected)
Exemple #20
0
 def test_unweighted_unifrac_identity(self):
     for i in range(len(self.b1)):
         actual = unweighted_unifrac(self.b1[i], self.b1[i], self.oids1,
                                     self.t1)
         expected = 0.0
         self.assertAlmostEqual(actual, expected)
Exemple #21
0
 def test_unweighted_unifrac(self):
     # expected results derived from QIIME 1.9.1, which
     # is a completely different implementation skbio's initial
     # unweighted unifrac implementation
     # sample A versus all
     actual = unweighted_unifrac(
         self.table1[0], self.table1[1], self.oids1, self.t1)
     expected = 0.238095238095
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(
         self.table1[0], self.table1[2], self.oids1, self.t1)
     expected = 0.52
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(
         self.table1[0], self.table1[3], self.oids1, self.t1)
     expected = 0.52
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(
         self.table1[0], self.table1[4], self.oids1, self.t1)
     expected = 0.545454545455
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(
         self.table1[0], self.table1[5], self.oids1, self.t1)
     expected = 0.619047619048
     self.assertAlmostEqual(actual, expected)
     # sample B versus remaining
     actual = unweighted_unifrac(
         self.table1[1], self.table1[2], self.oids1, self.t1)
     expected = 0.347826086957
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(
         self.table1[1], self.table1[3], self.oids1, self.t1)
     expected = 0.347826086957
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(
         self.table1[1], self.table1[4], self.oids1, self.t1)
     expected = 0.68
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(
         self.table1[1], self.table1[5], self.oids1, self.t1)
     expected = 0.421052631579
     self.assertAlmostEqual(actual, expected)
     # sample C versus remaining
     actual = unweighted_unifrac(
         self.table1[2], self.table1[3], self.oids1, self.t1)
     expected = 0.0
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(
         self.table1[2], self.table1[4], self.oids1, self.t1)
     expected = 0.68
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(
         self.table1[2], self.table1[5], self.oids1, self.t1)
     expected = 0.421052631579
     self.assertAlmostEqual(actual, expected)
     # sample D versus remaining
     actual = unweighted_unifrac(
         self.table1[3], self.table1[4], self.oids1, self.t1)
     expected = 0.68
     self.assertAlmostEqual(actual, expected)
     actual = unweighted_unifrac(
         self.table1[3], self.table1[5], self.oids1, self.t1)
     expected = 0.421052631579
     self.assertAlmostEqual(actual, expected)
     # sample E versus remaining
     actual = unweighted_unifrac(
         self.table1[4], self.table1[5], self.oids1, self.t1)
     expected = 1.0
     self.assertAlmostEqual(actual, expected)
Exemple #22
0
 def test_unweighted_unifrac_kwargs(self):
     # confirm that **kwargs can be passed
     actual = unweighted_unifrac(self.table1[0], self.table1[0], self.oids1,
                                 self.t1, not_a_known_parameter=42)
     self.assertAlmostEqual(actual, 0.0)
Exemple #23
0
 def test_unweighted_unifrac_identity(self):
     for i in range(len(self.table1)):
         actual = unweighted_unifrac(
             self.table1[i], self.table1[i], self.oids1, self.t1)
         expected = 0.0
         self.assertAlmostEqual(actual, expected)