def test_vectorize_counts_and_tree(self): t = TreeNode.read(io.StringIO("((a:1, b:2)c:3)root;")) counts = np.array([[0, 1], [1, 5], [10, 1]]) count_array, indexed, branch_lengths = \ _vectorize_counts_and_tree(counts, np.array(['a', 'b']), t) exp_counts = np.array([[0, 1, 10], [1, 5, 1], [1, 6, 11], [1, 6, 11]]) npt.assert_equal(count_array, exp_counts.T)
def test_vectorize_counts_and_tree(self): t = TreeNode.read(io.StringIO("((a:1, b:2)c:3)root;")) counts = np.array([[0, 1], [1, 5], [10, 1]]) count_array, indexed, branch_lengths = \ _vectorize_counts_and_tree(counts, np.array(['a', 'b']), t) exp_counts = np.array([[0, 1, 10], [1, 5, 1], [1, 6, 11], [1, 6, 11]]) npt.assert_equal(count_array, exp_counts.T)
def _setup_multiple_unifrac(counts, otu_ids, tree, validate): if validate: _validate_otu_ids_and_tree(counts[0], otu_ids, tree) counts_by_node, tree_index, branch_lengths = _vectorize_counts_and_tree(counts, otu_ids, tree) return counts_by_node, tree_index, branch_lengths
def _setup_multiple_unifrac(counts, otu_ids, tree, validate): if validate: _validate_otu_ids_and_tree(counts[0], otu_ids, tree) counts_by_node, tree_index, branch_lengths = \ _vectorize_counts_and_tree(counts, otu_ids, tree) return counts_by_node, tree_index, branch_lengths
def _setup_faith_pd(counts, otu_ids, tree, validate, single_sample): if validate: if single_sample: # only validate count if operating in single sample mode, they # will have already been validated otherwise counts = _validate_counts_vector(counts) _validate_otu_ids_and_tree(counts, otu_ids, tree) else: _validate_otu_ids_and_tree(counts[0], otu_ids, tree) counts_by_node, tree_index, branch_lengths = _vectorize_counts_and_tree(counts, otu_ids, tree) return counts_by_node, branch_lengths
def _setup_faith_pd(counts, otu_ids, tree, validate, single_sample): if validate: if single_sample: # only validate count if operating in single sample mode, they # will have already been validated otherwise counts = _validate_counts_vector(counts) _validate_otu_ids_and_tree(counts, otu_ids, tree) else: _validate_otu_ids_and_tree(counts[0], otu_ids, tree) counts_by_node, tree_index, branch_lengths = \ _vectorize_counts_and_tree(counts, otu_ids, tree) return counts_by_node, branch_lengths
def _run_unifrac(counts, otu_ids, pruned_phylo, method): _, tree_index, branch_lengths = \ _vectorize_counts_and_tree(counts[0, :], otu_ids, pruned_phylo) if method == 'weighted_unifrac': features = _weighted_unifrac_features(counts, otu_ids, pruned_phylo, tree_index, branch_lengths) elif method == 'unweighted_unifrac': features = _unweighted_unifrac_features(counts, branch_lengths, otu_ids, pruned_phylo) else: raise ValueError( 'Method not implemented. Options are ' 'weighted_unifrac or unweighted_unifrac, but', method, 'is given') return features, tree_index
def _setup_pairwise_unifrac(u_counts, v_counts, otu_ids, tree, validate, normalized, unweighted): if validate: _validate(u_counts, v_counts, otu_ids, tree) # temporarily store u_counts and v_counts in a 2-D array as that's what # _vectorize_counts_and_tree takes u_counts = np.asarray(u_counts) v_counts = np.asarray(v_counts) counts = np.vstack([u_counts, v_counts]) counts_by_node, tree_index, branch_lengths = _vectorize_counts_and_tree(counts, otu_ids, tree) # unpack counts vectors for single pairwise UniFrac calculation u_node_counts = counts_by_node[0] v_node_counts = counts_by_node[1] u_total_count = u_counts.sum() v_total_count = v_counts.sum() return (u_node_counts, v_node_counts, u_total_count, v_total_count, tree_index)
def _setup_pairwise_unifrac(u_counts, v_counts, otu_ids, tree, validate, normalized, unweighted): if validate: _validate(u_counts, v_counts, otu_ids, tree) # temporarily store u_counts and v_counts in a 2-D array as that's what # _vectorize_counts_and_tree takes u_counts = np.asarray(u_counts) v_counts = np.asarray(v_counts) counts = np.vstack([u_counts, v_counts]) counts_by_node, tree_index, branch_lengths = \ _vectorize_counts_and_tree(counts, otu_ids, tree) # unpack counts vectors for single pairwise UniFrac calculation u_node_counts = counts_by_node[0] v_node_counts = counts_by_node[1] u_total_count = u_counts.sum() v_total_count = v_counts.sum() return (u_node_counts, v_node_counts, u_total_count, v_total_count, tree_index)