def test_gradient_sort_error(self): # Makes sure that the tree is sorted according # a pre-set ordering tree = TreeNode.read([u'((a,b)c, ((g,h)e,f)d)r;']) x = pd.Series({'f': 'x', 'g': 'y', 'h': 'z', 'a': 'u', 'b': 'dz'}) with self.assertRaises(ValueError): gradient_sort(tree, x)
def gradient_clustering(table: pd.DataFrame, gradient: NumericMetadataColumn, weighted: bool = True) -> skbio.TreeNode: """ Builds a tree for features based on a gradient. Parameters ---------- table : pd.DataFrame Contingency table where rows are samples and columns are features. gradient : qiime2.NumericMetadataColumn Continuous vector of measurements corresponding to samples. weighted : bool Specifies if abundance or presence/absence information should be used to perform the clustering. Returns ------- skbio.TreeNode Represents the partitioning of features with respect to the gradient. """ c = gradient.to_series() if not weighted: table = (table > 0).astype(np.float) table, c = match(table, c) t = gradient_linkage(table, c, method='average') mean_g = mean_niche_estimator(table, c) mean_g = pd.Series(mean_g, index=table.columns) mean_g = mean_g.sort_values() t = gradient_sort(t, mean_g) return t
def test_gradient_sort_descending(self): # Makes sure that the tree is sorted according # a pre-set ordering in descending order tree = TreeNode.read([u'((a,b)c, ((g,h)e,f)d)r;']) exp = '((b,a)c,(f,(h,g)e)d)r;\n' x = pd.Series({'f': 3, 'g': 1, 'h': 2, 'a': 4, 'b': 5}) res = str(gradient_sort(tree, x, ascending=False)) self.assertEqual(exp, res)
def test_gradient_sort(self): # Makes sure that the tree is sorted according # a pre-set ordering tree = TreeNode.read([u'((a,b)c, ((g,h)e,f)d)r;']) exp = '(((g,h)e,f)d,(a,b)c)r;\n' x = pd.Series({'f': 3, 'g': 1, 'h': 2, 'a': 4, 'b': 5}) res = str(gradient_sort(tree, x)) self.assertEqual(exp, res)
def gradient_clustering(table: pd.DataFrame, gradient: NumericMetadataColumn, ignore_missing_samples: bool = False, weighted: bool = True) -> skbio.TreeNode: """ Builds a tree for features based on a gradient. Parameters ---------- table : pd.DataFrame Contingency table where rows are samples and columns are features. gradient : qiime2.NumericMetadataColumn Continuous vector of measurements corresponding to samples. ignore_missing_samples: bool Whether to except or ignore when there are samples present in the table that are not present in the gradient metadata. weighted : bool Specifies if abundance or presence/absence information should be used to perform the clustering. Returns ------- skbio.TreeNode Represents the partitioning of features with respect to the gradient. """ c = gradient.to_series() if not ignore_missing_samples: difference = set(table.index) - set(c.index) if difference: raise KeyError("There are samples present in the table not " "present in the gradient metadata column. Override " "this error by using the `ignore_missing_samples` " "argument. Offending samples: %r" % ', '.join(sorted([str(i) for i in difference]))) if not weighted: table = (table > 0).astype(float) table, c = match(table, c) t = gradient_linkage(table, c, method='average') mean_g = mean_niche_estimator(table, c) mean_g = pd.Series(mean_g, index=table.columns) mean_g = mean_g.sort_values() t = gradient_sort(t, mean_g) return t