Esempio n. 1
0
 def test_correlation_linkage_1(self):
     table = pd.DataFrame([[1, 1, 0, 0, 0], [0, 1, 1, 0, 0],
                           [0, 0, 1, 1, 0], [0, 0, 0, 1, 1]],
                          columns=['s1', 's2', 's3', 's4', 's5'],
                          index=['o1', 'o2', 'o3', 'o4']).T
     exp_str = ('((o1:0.574990173931,o2:0.574990173931)y1:0.773481312844,'
                '(o3:0.574990173931,o4:0.574990173931)y2:0.773481312844)'
                'y0;\n')
     res_tree = correlation_linkage(table + 0.1)
     self.assertEqual(exp_str, str(res_tree))
Esempio n. 2
0
 def test_correlation_linkage_2(self):
     t = pd.DataFrame([[1, 1, 2, 3, 1, 4], [2, 2, 0.1, 4, 1, .1],
                       [3, 3.1, 2, 3, 2, 2], [4.1, 4, 0.2, 1, 1, 2.5]],
                      index=['S1', 'S2', 'S3', 'S4'],
                      columns=['F1', 'F2', 'F3', 'F4', 'F5', 'F6'])
     exp_str = ('((F4:0.228723591874,(F5:0.074748541601,'
                '(F1:0.00010428164962,F2:0.00010428164962)'
                'y4:0.0746442599513)y3:0.153975050273)'
                'y1:0.70266138894,(F3:0.266841737789,F6:0.266841737789)'
                'y2:0.664543243026)y0;\n')
     res_tree = correlation_linkage(t)
     self.assertEqual(exp_str, str(res_tree))
Esempio n. 3
0
 def test_correlation_linkage_1(self):
     table = pd.DataFrame([[1, 1, 0, 0, 0], [0, 1, 1, 0, 0],
                           [0, 0, 1, 1, 0], [0, 0, 0, 1, 1]],
                          columns=['s1', 's2', 's3', 's4', 's5'],
                          index=['o1', 'o2', 'o3', 'o4']).T
     exp_str = ('((o1:0.574990173931,o2:0.574990173931)y1:0.773481312844,'
                '(o3:0.574990173931,o4:0.574990173931)y2:0.773481312844)'
                'y0;\n')
     exp_tree = TreeNode.read([exp_str])
     res_tree = correlation_linkage(table + 0.1)
     # only check for tree topology since checking for floating point
     # numbers on the branches is still tricky.
     self.assertEqual(exp_tree.ascii_art(), res_tree.ascii_art())
Esempio n. 4
0
 def test_correlation_linkage_2(self):
     t = pd.DataFrame([[1, 1, 2, 3, 1, 4],
                       [2, 2, 0.1, 4, 1, .1],
                       [3, 3.1, 2, 3, 2, 2],
                       [4.1, 4, 0.2, 1, 1, 2.5]],
                      index=['S1', 'S2', 'S3', 'S4'],
                      columns=['F1', 'F2', 'F3', 'F4', 'F5', 'F6'])
     exp_str = ('((F4:0.228723591874,(F5:0.074748541601,'
                '(F1:0.00010428164962,F2:0.00010428164962)'
                'y4:0.0746442599513)y3:0.153975050273)'
                'y1:0.70266138894,(F3:0.266841737789,F6:0.266841737789)'
                'y2:0.664543243026)y0;\n')
     exp_tree = TreeNode.read([exp_str])
     res_tree = correlation_linkage(t)
     self.assertEqual(exp_tree.ascii_art(), res_tree.ascii_art())
Esempio n. 5
0
def correlation_clustering(table: pd.DataFrame) -> skbio.TreeNode:
    """ Builds a tree for features based on correlation.

    Parameters
    ----------
    table : pd.DataFrame
       Contingency table where rows are samples and columns are features.
       In addition, the table must have strictly nonzero values.

    Returns
    -------
    skbio.TreeNode
       Represents the partitioning of features with respect to correlation.
    """
    t = correlation_linkage(table)
    return t
Esempio n. 6
0
 def test_correlation_linkage_1(self):
     table = pd.DataFrame(
         [[1, 1, 0, 0, 0],
          [0, 1, 1, 0, 0],
          [0, 0, 1, 1, 0],
          [0, 0, 0, 1, 1]],
         columns=['s1', 's2', 's3', 's4', 's5'],
         index=['o1', 'o2', 'o3', 'o4']).T
     exp_str = ('((o1:0.574990173931,o2:0.574990173931)y1:0.773481312844,'
                '(o3:0.574990173931,o4:0.574990173931)y2:0.773481312844)'
                'y0;\n')
     exp_tree = TreeNode.read([exp_str])
     res_tree = correlation_linkage(table+0.1)
     # only check for tree topology since checking for floating point
     # numbers on the branches is still tricky.
     self.assertEqual(exp_tree.ascii_art(), res_tree.ascii_art())