Esempio n. 1
0
 def test_gradient_sort_error(self):
     # Makes sure that the tree is sorted according
     # a pre-set ordering
     tree = TreeNode.read([u'((a,b)c, ((g,h)e,f)d)r;'])
     x = pd.Series({'f': 'x', 'g': 'y', 'h': 'z', 'a': 'u', 'b': 'dz'})
     with self.assertRaises(ValueError):
         gradient_sort(tree, x)
Esempio n. 2
0
 def test_gradient_sort_error(self):
     # Makes sure that the tree is sorted according
     # a pre-set ordering
     tree = TreeNode.read([u'((a,b)c, ((g,h)e,f)d)r;'])
     x = pd.Series({'f': 'x', 'g': 'y', 'h': 'z', 'a': 'u', 'b': 'dz'})
     with self.assertRaises(ValueError):
         gradient_sort(tree, x)
Esempio n. 3
0
def gradient_clustering(table: pd.DataFrame,
                        gradient: NumericMetadataColumn,
                        weighted: bool = True) -> skbio.TreeNode:
    """ Builds a tree for features based on a gradient.

    Parameters
    ----------
    table : pd.DataFrame
       Contingency table where rows are samples and columns are features.
    gradient : qiime2.NumericMetadataColumn
       Continuous vector of measurements corresponding to samples.
    weighted : bool
       Specifies if abundance or presence/absence information
       should be used to perform the clustering.

    Returns
    -------
    skbio.TreeNode
       Represents the partitioning of features with respect to the gradient.
    """
    c = gradient.to_series()
    if not weighted:
        table = (table > 0).astype(np.float)
    table, c = match(table, c)
    t = gradient_linkage(table, c, method='average')
    mean_g = mean_niche_estimator(table, c)
    mean_g = pd.Series(mean_g, index=table.columns)
    mean_g = mean_g.sort_values()
    t = gradient_sort(t, mean_g)
    return t
Esempio n. 4
0
 def test_gradient_sort_descending(self):
     # Makes sure that the tree is sorted according
     # a pre-set ordering in descending order
     tree = TreeNode.read([u'((a,b)c, ((g,h)e,f)d)r;'])
     exp = '((b,a)c,(f,(h,g)e)d)r;\n'
     x = pd.Series({'f': 3, 'g': 1, 'h': 2, 'a': 4, 'b': 5})
     res = str(gradient_sort(tree, x, ascending=False))
     self.assertEqual(exp, res)
Esempio n. 5
0
 def test_gradient_sort(self):
     # Makes sure that the tree is sorted according
     # a pre-set ordering
     tree = TreeNode.read([u'((a,b)c, ((g,h)e,f)d)r;'])
     exp = '(((g,h)e,f)d,(a,b)c)r;\n'
     x = pd.Series({'f': 3, 'g': 1, 'h': 2, 'a': 4, 'b': 5})
     res = str(gradient_sort(tree, x))
     self.assertEqual(exp, res)
Esempio n. 6
0
 def test_gradient_sort_descending(self):
     # Makes sure that the tree is sorted according
     # a pre-set ordering in descending order
     tree = TreeNode.read([u'((a,b)c, ((g,h)e,f)d)r;'])
     exp = '((b,a)c,(f,(h,g)e)d)r;\n'
     x = pd.Series({'f': 3, 'g': 1, 'h': 2, 'a': 4, 'b': 5})
     res = str(gradient_sort(tree, x, ascending=False))
     self.assertEqual(exp, res)
Esempio n. 7
0
 def test_gradient_sort(self):
     # Makes sure that the tree is sorted according
     # a pre-set ordering
     tree = TreeNode.read([u'((a,b)c, ((g,h)e,f)d)r;'])
     exp = '(((g,h)e,f)d,(a,b)c)r;\n'
     x = pd.Series({'f': 3, 'g': 1, 'h': 2, 'a': 4, 'b': 5})
     res = str(gradient_sort(tree, x))
     self.assertEqual(exp, res)
Esempio n. 8
0
def gradient_clustering(table: pd.DataFrame,
                        gradient: NumericMetadataColumn,
                        ignore_missing_samples: bool = False,
                        weighted: bool = True) -> skbio.TreeNode:
    """ Builds a tree for features based on a gradient.

    Parameters
    ----------
    table : pd.DataFrame
       Contingency table where rows are samples and columns are features.
    gradient : qiime2.NumericMetadataColumn
       Continuous vector of measurements corresponding to samples.
    ignore_missing_samples: bool
        Whether to except or ignore when there are samples present in the table
        that are not present in the gradient metadata.
    weighted : bool
       Specifies if abundance or presence/absence information
       should be used to perform the clustering.

    Returns
    -------
    skbio.TreeNode
       Represents the partitioning of features with respect to the gradient.
    """
    c = gradient.to_series()
    if not ignore_missing_samples:
        difference = set(table.index) - set(c.index)
        if difference:
            raise KeyError("There are samples present in the table not "
                           "present in the gradient metadata column. Override "
                           "this error by using the `ignore_missing_samples` "
                           "argument. Offending samples: %r" %
                           ', '.join(sorted([str(i) for i in difference])))
    if not weighted:
        table = (table > 0).astype(float)
    table, c = match(table, c)
    t = gradient_linkage(table, c, method='average')
    mean_g = mean_niche_estimator(table, c)
    mean_g = pd.Series(mean_g, index=table.columns)
    mean_g = mean_g.sort_values()
    t = gradient_sort(t, mean_g)
    return t