def test_make_unifrac_metric2(self): """ samples with no seqs, and identical samples, should behave correctly """ tree = parse_newick(self.l19_treestr, PhyloNode) unif = make_unifrac_metric(False, unifrac, True) otu_data = numpy.array([ [0, 0, 0, 0, 0, 0, 0, 0, 0], #sam1 zeros [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0], #sam14 zeros [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], #sam 16 now like sam 13 [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0] ]) warnings.filterwarnings('ignore') res = unif(otu_data, self.l19_taxon_names, tree, self.l19_sample_names) envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) self.assertEqual(res[0, 0], 0) self.assertEqual(res[0, 13], 0.0) self.assertEqual(res[12, 15], 0.0) self.assertEqual(res[0, 1], 1.0) warnings.resetwarnings()
def result(data, taxon_names, tree, sample_names, one_sample_name, **kwargs): """ wraps the fast_unifrac fn to return just a matrix, in correct order sample_names: list of unique strings """ envs = make_envs_dict(data, sample_names, taxon_names) try: unifrac_res = fast_unifrac_one_sample(one_sample_name, tree, envs, weighted=weighted, metric=metric, **kwargs) except ValueError as e: if 'one_sample_name not found' in str(e): warnings.warn('unifrac had no information on sample ' +\ one_sample_name +\ ". Distances involving that sample aren't meaningful") unifrac_res = (numpy.array([0.0]), [one_sample_name] ) # self only else: raise e dist_mtx = _reorder_unifrac_res_one_sample(unifrac_res, sample_names) return dist_mtx
def test_make_envs_dict(self): """ make_envs_dict should have the same abundance for each taxon as the matrix that made the dict""" envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) for key in envs.keys(): col_idx = self.l19_taxon_names.index(key) self.assertEqual(sum(envs[key].values()), self.l19_data[:, col_idx].sum())
def test_make_envs_dict(self): """ make_envs_dict should have the same abundance for each taxon as the matrix that made the dict""" envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) for key in envs.keys(): col_idx = self.l19_taxon_names.index(key) self.assertEqual(sum(envs[key].values()), self.l19_data[:,col_idx].sum())
def test_make_unifrac_metric(self): """ exercise of the unweighted unifrac metric should not throw errors""" tree = parse_newick(self.l19_treestr, PhyloNode) unif = make_unifrac_metric(False, unifrac, True) res = unif(self.l19_data, self.l19_taxon_names, tree, self.l19_sample_names) envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) unifrac_mat, unifrac_names = fast_unifrac(tree, envs, modes=["distance_matrix"])["distance_matrix"] self.assertFloatEqual(res, _reorder_unifrac_res([unifrac_mat, unifrac_names], self.l19_sample_names)) self.assertEqual(res[0, 0], 0) self.assertEqual(res[0, 3], 0.0) self.assertNotEqual(res[0, 1], 1.0)
def result(data, taxon_names, tree, sample_names, one_sample_name,**kwargs): """ wraps the fast_unifrac fn to return just a matrix, in correct order sample_names: list of unique strings """ envs = make_envs_dict(data, sample_names, taxon_names) unifrac_res = fast_unifrac_one_sample(one_sample_name, tree, envs, weighted=weighted, metric=metric,**kwargs) dist_mtx = _reorder_unifrac_res_one_sample(unifrac_res, sample_names) return dist_mtx
def result(data, taxon_names, tree, sample_names, **kwargs): """ wraps the fast_unifrac fn to return just a matrix, in correct order sample_names: list of unique strings """ envs = make_envs_dict(data, sample_names, taxon_names) unifrac_res = fast_unifrac( tree, envs, weighted=weighted, metric=metric, is_symmetric=is_symmetric, modes=["distance_matrix"], **kwargs ) dist_mtx = _reorder_unifrac_res(unifrac_res["distance_matrix"], sample_names) return dist_mtx
def test_make_unifrac_row_metric3(self): treestr = '((((tax7:0.1):.98,tax8:.3, tax4:.3):.4, ' +\ '((tax6:.09):0.43):0.5):.2,' +\ '(tax9:0.3, endbigtaxon:.08));' # taxa 1,2,3 removed tree = parse_newick(treestr, PhyloNode) otu_data = numpy.array([ [7, 1, 0, 0, 0, 0, 0, 0, 0], # 1 now zeros [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], # 4 now zeros [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 1, 7, 0, 0, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 0, 7, 1, 0, 0, 0, 0], [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0] ]) unif = make_unifrac_metric(False, unifrac, True) warnings.filterwarnings('ignore') res = unif(otu_data, self.l19_taxon_names, tree, self.l19_sample_names) warnings.resetwarnings() envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) self.assertEqual(res[0, 0], 0) self.assertEqual(res[0, 3], 0.0) self.assertEqual(res[0, 1], 1.0) warnings.filterwarnings('ignore') unif_row = make_unifrac_row_metric(False, unifrac, True) for i, sam_name in enumerate(self.l19_sample_names): if i in [0, 3, 4, 5, 8, 9]: continue # these have no data and are warned "meaningless". # I Would prefer if they matched res anyway though res_row = unif_row(otu_data, self.l19_taxon_names, tree, self.l19_sample_names, sam_name) for j in range(len(self.l19_sample_names)): if j in [0, 3, 4, 5, 8, 9]: continue # ok if meaningless number in zero sample self.assertEqual(res_row[j], res[i, j]) warnings.resetwarnings()
def result(data, taxon_names, tree, sample_names, **kwargs): """ wraps the fast_unifrac fn to return just a matrix, in correct order sample_names: list of unique strings """ envs = make_envs_dict(data, sample_names, taxon_names) unifrac_res = fast_unifrac(tree, envs, weighted=weighted, metric=metric, is_symmetric=is_symmetric, modes=["distance_matrix"],**kwargs) dist_mtx = _reorder_unifrac_res(unifrac_res['distance_matrix'], sample_names) return dist_mtx
def test_make_unifrac_row_metric3(self): treestr = '((((tax7:0.1):.98,tax8:.3, tax4:.3):.4, ' +\ '((tax6:.09):0.43):0.5):.2,' +\ '(tax9:0.3, endbigtaxon:.08));' # taxa 1,2,3 removed tree = parse_newick(treestr, PhyloNode) otu_data = numpy.array([ [7, 1, 0, 0, 0, 0, 0, 0, 0], # 1 now zeros [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], # 4 now zeros [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 1, 7, 0, 0, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 0, 7, 1, 0, 0, 0, 0], [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0] ]) unif = make_unifrac_metric(False, unifrac, True) warnings.filterwarnings('ignore') res = unif(otu_data, self.l19_taxon_names, tree, self.l19_sample_names) warnings.resetwarnings() envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) self.assertEqual(res[0, 0], 0) self.assertEqual(res[0, 3], 0.0) self.assertEqual(res[0, 1], 1.0) warnings.filterwarnings('ignore') unif_row = make_unifrac_row_metric(False, unifrac, True) for i, sam_name in enumerate(self.l19_sample_names): if i in [0, 3, 4, 5, 8, 9]: continue # these have no data and are warned "meaningless". # I Would prefer if they matched res anyway though res_row = unif_row(otu_data, self.l19_taxon_names, tree, self.l19_sample_names, sam_name) for j in range(len(self.l19_sample_names)): if j in [0, 3, 4, 5, 8, 9]: continue # ok if meaningless number in zero sample self.assertAlmostEqual(res_row[j], res[i, j]) warnings.resetwarnings()
def result(data, taxon_names, tree, sample_names, one_sample_name): """ wraps the fast_unifrac fn to return just a matrix, in correct order sample_names: list of unique strings """ envs = make_envs_dict(data, sample_names, taxon_names) unifrac_res = fast_unifrac_one_sample(one_sample_name, tree, envs, weighted=weighted, metric=metric) dist_mtx = _reorder_unifrac_res_one_sample(unifrac_res, sample_names) return dist_mtx
def test_make_unifrac_row_metric2(self): """ samples with no seqs, and identical samples, should behave correctly """ tree = parse_newick(self.l19_treestr, PhyloNode) unif = make_unifrac_metric(False, unifrac, True) otu_data = numpy.array([ [0, 0, 0, 0, 0, 0, 0, 0, 0], # sam1 zeros [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0], # sam14 zeros [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], # sam 16 now like sam 13 [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0] ]) warnings.filterwarnings('ignore') res = unif(otu_data, self.l19_taxon_names, tree, self.l19_sample_names) envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) self.assertEqual(res[0, 0], 0) self.assertEqual(res[0, 13], 0.0) self.assertEqual(res[12, 15], 0.0) self.assertEqual(res[0, 1], 1.0) warnings.resetwarnings() warnings.filterwarnings('ignore') unif_row = make_unifrac_row_metric(False, unifrac, True) for i, sam_name in enumerate(self.l19_sample_names): if i in [0]: continue # these have no data and are warned "meaningless". # I Would prefer if they matched res anyway though res_row = unif_row(otu_data, self.l19_taxon_names, tree, self.l19_sample_names, sam_name) for j in range(len((self.l19_sample_names))): if j in [0]: continue # ok if meaningless number in zero sample self.assertEqual(res_row[j], res[i, j]) warnings.resetwarnings()
def getResult(self, data_path, taxon_names=None, sample_names=None, tree_path=None): """Returns per-sample diversity from incidence matrix and optional tree. Parameters: data_path: can be either a file path or an array, if array: either numpy array or list of numpy arrays where each row is a sample, contents are counts of each taxon, must be dense to allow phylogenetic calcs (where the taxon you have matters). must be 2d. for one sample just do [sample_array] taxon_names: list of names of taxa, same order as in row (required for phylogenetic methods) tree: cogent.tree.PhyloNode object, or file path output: 1d/2d array containing diversity of each sample, preserving order from input data sample by (metric name or metric.return_name) 1d: [(metric on sample1), (metric on sample2),... 2d: [(return val 1 from sample1),(return val 2)...] [(return val 1 on sample2),...] """ data = self.getData(data_path) if self.IsPhylogenetic: tree = self.getTree(tree_path) envs = make_envs_dict(data, sample_names, taxon_names) new_sample_names, result = self.Metric(tree, envs, **self.Params) ordered_res = numpy.zeros(len(sample_names), 'float') for i, sample in enumerate(sample_names): try: # idx is sample's index in result from metric idx = new_sample_names.index(sample) ordered_res[i] = result[idx] except ValueError: pass # already is zero return numpy.array(ordered_res) else: def metric(row): return self.Metric(row, **self.Params) result = map(metric, data) return numpy.array(result)
def test_make_unifrac_metric(self): """ exercise of the unweighted unifrac metric should not throw errors""" tree = parse_newick(self.l19_treestr, PhyloNode) unif = make_unifrac_metric(False, unifrac, True) res = unif(self.l19_data, self.l19_taxon_names, tree, self.l19_sample_names) envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) unifrac_mat, unifrac_names = fast_unifrac(tree, envs, modes=['distance_matrix'])['distance_matrix'] self.assertFloatEqual(res, _reorder_unifrac_res([unifrac_mat, unifrac_names], self.l19_sample_names)) self.assertEqual(res[0,0], 0) self.assertEqual(res[0,3], 0.0) self.assertNotEqual(res[0,1], 1.0)
def result(data, taxon_names, tree, sample_names, one_sample_name,**kwargs): """ wraps the fast_unifrac fn to return just a matrix, in correct order sample_names: list of unique strings """ envs = make_envs_dict(data, sample_names, taxon_names) try: unifrac_res = fast_unifrac_one_sample(one_sample_name, tree, envs, weighted=weighted, metric=metric,**kwargs) except ValueError as e: if 'one_sample_name not found' in str(e): warnings.warn('unifrac had no information on sample ' +\ one_sample_name +\ ". Distances involving that sample aren't meaningful") unifrac_res = (numpy.array([0.0]),[one_sample_name]) # self only else: raise e dist_mtx = _reorder_unifrac_res_one_sample(unifrac_res, sample_names) return dist_mtx
def test_make_unifrac_metric3(self): treestr = '((((tax7:0.1):.98,tax8:.3, tax4:.3):.4, '+\ '((tax6:.09):0.43):0.5):.2,'+\ '(tax9:0.3, endbigtaxon:.08));' # taxa 1,2,3 removed tree = parse_newick(treestr, PhyloNode) otu_data = numpy.array([ [7,1,0,0,0,0,0,0,0], # 1 now zeros [4,2,0,0,0,1,0,0,0], [2,4,0,0,0,1,0,0,0], [1,7,0,0,0,0,0,0,0], # 4 now zeros [0,8,0,0,0,0,0,0,0], [0,7,1,0,0,0,0,0,0], [0,4,2,0,0,0,2,0,0], [0,2,4,0,0,0,1,0,0], [0,1,7,0,0,0,0,0,0], [0,0,8,0,0,0,0,0,0], [0,0,7,1,0,0,0,0,0], [0,0,4,2,0,0,0,3,0], [0,0,2,4,0,0,0,1,0], [0,0,1,7,0,0,0,0,0], [0,0,0,8,0,0,0,0,0], [0,0,0,7,1,0,0,0,0], [0,0,0,4,2,0,0,0,4], [0,0,0,2,4,0,0,0,1], [0,0,0,1,7,0,0,0,0] ]) unif = make_unifrac_metric(False, unifrac, True) warnings.filterwarnings('ignore') res = unif(otu_data, self.l19_taxon_names, tree, self.l19_sample_names) warnings.resetwarnings() envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) self.assertEqual(res[0,0], 0) self.assertEqual(res[0,3], 0.0) self.assertEqual(res[0,1], 1.0)
def test_make_unifrac_metric2(self): """ samples with no seqs, and identical samples, should behave correctly """ tree = parse_newick(self.l19_treestr, PhyloNode) unif = make_unifrac_metric(False, unifrac, True) otu_data = numpy.array([ [0,0,0,0,0,0,0,0,0],#sam1 zeros [4,2,0,0,0,1,0,0,0], [2,4,0,0,0,1,0,0,0], [1,7,0,0,0,0,0,0,0], [0,8,0,0,0,0,0,0,0], [0,7,1,0,0,0,0,0,0], [0,4,2,0,0,0,2,0,0], [0,2,4,0,0,0,1,0,0], [0,1,7,0,0,0,0,0,0], [0,0,8,0,0,0,0,0,0], [0,0,7,1,0,0,0,0,0], [0,0,4,2,0,0,0,3,0], [0,0,2,4,0,0,0,1,0], [0,0,0,0,0,0,0,0,0],#sam14 zeros [0,0,0,8,0,0,0,0,0], [0,0,2,4,0,0,0,1,0], #sam 16 now like sam 13 [0,0,0,4,2,0,0,0,4], [0,0,0,2,4,0,0,0,1], [0,0,0,1,7,0,0,0,0] ]) warnings.filterwarnings('ignore') res = unif(otu_data, self.l19_taxon_names, tree, self.l19_sample_names) envs = make_envs_dict(self.l19_data, self.l19_sample_names, self.l19_taxon_names) self.assertEqual(res[0,0], 0) self.assertEqual(res[0,13], 0.0) self.assertEqual(res[12,15], 0.0) self.assertEqual(res[0,1], 1.0) warnings.resetwarnings()
def unifrac_recursive_test(ref_tree, tree, sample_names, taxon_names, data, permutations=1000): # , metric=weighted): """Performs UniFrac recursively over a tree. Specifically, for each node in the tree, performs UniFrac clustering. Then compares the UniFrac tree to a reference tree of the same taxa using the tip-to-tip distances and the subset distances. Assumption is that if the two trees match, the node represents a group in which evolution has mirrored the evolution of the reference tree. tree: contains the tree on which UniFrac will be performed recursively. envs: environments for UniFrac clustering (these envs should match the taxon labels in the ref_tree) ref_tree: reference tree that the clustering is supposed to match. metric: metric for UniFrac clustering. Typically, will want to estimate significance by comparing the actual values from ref_tree to values obtained with one or more shuffled versions of ref_tree (can make these with permute_tip_labels). Note from Jon: I've modified this code a bit to test each node against a set of label- permuted host trees, and return some additional information about each node. It doesn't appear to give sensible results, not sure why. Almost none of the resulting permutations yield any other than zero or the number of permuta- tions. In other words, every permutation yields either a better or worse match than the true tree. """ UNIFRAC_CLUST_ENVS = "cluster_envs" lengths, dists, sets, s_nodes, h_nodes, dist_below, sets_below, h_tips, s_tips = [ ], [], [], [], [], [], [], [], [] # Permute host tips, store permuted trees in a list of tree strings # print "Permuting host tree..." permuted_trees = [] host_names = ref_tree.getTipNames() random_names = ref_tree.getTipNames() # for i in range(permutations): # shuffle(random_names) # permute_dict = dict(zip(host_names,random_names)) # permuted_subtree = ref_tree.copy() # permuted_subtree.reassignNames(permute_dict) # permuted_trees.append(str(permuted_subtree)) # # alt: for i in range(permutations): shuffle(random_names) permute_dict = dict(zip(host_names, random_names)) permuted_subtree = ref_tree.copy() permuted_subtree.reassignNames(permute_dict) permuted_trees.append(permuted_subtree) interaction = data.clip(0, 1) # Parse OTU table data into Unifrac-compatible envs tuple envs = make_envs_dict(data.T, sample_names, taxon_names) # Pass host tree, new OTU tree, and envs to recursive unifrac # print "Performing recursive Unifrac analysis..." for node in tree.traverse(self_before=True, self_after=False): #pause = raw_input("pause!") # print node try: result = fast_unifrac( node, envs, weighted=False, modes=set([UNIFRAC_CLUST_ENVS])) curr_tree = result[UNIFRAC_CLUST_ENVS] except ValueError: # hit a single node? continue except AttributeError: # hit a zero branch length continue if curr_tree is None: # hit single node? continue try: l = len(curr_tree.tips()) d = curr_tree.compareByTipDistances(ref_tree) s = curr_tree.compareBySubsets(ref_tree, True) d_b = 0.0 s_b = 0.0 # for rand_tree_string in permuted_trees: # rand_tree = DndParser(rand_tree_string) # if d >= curr_tree.compareByTipDistances(rand_tree): # d_b += 1 # if s >= curr_tree.compareBySubsets(rand_tree): # s_b += 1 for rand_tree in permuted_trees: if d >= curr_tree.compareByTipDistances(rand_tree): d_b += 1 if s >= curr_tree.compareBySubsets(rand_tree): s_b += 1 d_b = d_b / float(len(permuted_trees)) s_b = s_b / float(len(permuted_trees)) # The following section generates s_tips and h_tips variables # get just OTUs in this node otu_subset = node.getTipNames() s_tips_tmp = 0 h_tips_tmp = 0 s_vec = [] # find positional index (from OTU table) for each cOTU represented # in this node: for i in range(len(taxon_names)): if taxon_names[i] in otu_subset: s_tips_tmp += 1 s_vec.append(i) # slice interaction matrix down to only cOTUs in this node i_s_slice = interaction[numpy.ix_(s_vec)] # find positional index (this time from OTU table size) for each sample in this node: # sum all values in column for each host, if greater than zero, add # that host position to h_vec for j in range(i_s_slice.shape[1]): if i_s_slice[:, j].sum(): h_tips_tmp += 1 # want to calculate all values before appending so we can bail out # if any of the calculations fails: this ensures that the lists # remain synchronized. """ print curr_tree.asciiArt() print ref_tree.asciiArt() print l print d print d_b print s print s_b print node pause = raw_input("pause!") """ if l > 2: lengths.append(l) dists.append(d) sets.append(s) s_nodes.append(node) h_nodes.append(curr_tree) dist_below.append(d_b) sets_below.append(s_b) h_tips.append(h_tips_tmp) s_tips.append(s_tips_tmp) except ValueError: # no common taxa continue results_dict = {'p_vals': sets_below, 's_tips': s_tips, 'h_tips': h_tips, 's_nodes': s_nodes, 'h_nodes': h_nodes} acc_dict = {'lengths': lengths, 'dists': dists, 'sets': sets, 'dist_below': dist_below} return (results_dict, acc_dict)