def test_fitch_descendants_missing_data(self): """fitch_descendants should work with missing data""" #tree and envs for testing missing values t_str = '(((a:1,b:2):4,(c:3,d:1):2):1,(e:2,f:1):3);' env_str = """a A b B c D d C e C f D""" t = DndParser(t_str, UniFracTreeNode) node_index, nodes = index_tree(t) env_counts = count_envs(env_str.split('\n')) count_array, unique_envs, env_to_index, node_to_index = \ index_envs(env_counts, node_index) branch_lengths = get_branch_lengths(node_index) #test just the AB pair ab_counts = count_array[:, 0:2] bindings = bind_to_array(nodes, ab_counts) changes = fitch_descendants(bindings, counter=FitchCounter) self.assertEqual(changes, 1) orig_result = ab_counts.copy() #check that the original Fitch counter gives the expected #incorrect parsimony result changes = fitch_descendants(bindings, counter=FitchCounterDense) self.assertEqual(changes, 5) new_result = ab_counts.copy() #check that the two versions fill the array with the same values self.assertEqual(orig_result, new_result)
def setUp(self): """Define a couple of standard trees""" self.t1 = DndParser("(((a,b),c),(d,e))", UniFracTreeNode) self.t2 = DndParser("(((a,b),(c,d)),(e,f))", UniFracTreeNode) self.t3 = DndParser("(((a,b,c),(d)),(e,f))", UniFracTreeNode) self.t4 = DndParser("((c)b,((f,g,h)e,i)d)", UniFracTreeNode) self.t4.Name = "a" self.t_str = "((a:1,b:2):4,(c:3,(d:1,e:1):2):3)" self.t = DndParser(self.t_str, UniFracTreeNode) self.env_str = """ a A 1 a C 2 b A 1 b B 1 c B 1 d B 3 e C 1""" self.env_counts = count_envs(self.env_str.splitlines()) self.node_index, self.nodes = index_tree(self.t) self.count_array, self.unique_envs, self.env_to_index, self.node_to_index = index_envs( self.env_counts, self.node_index ) self.branch_lengths = get_branch_lengths(self.node_index) self.old_t_str = "((org1:0.11,org2:0.22,(org3:0.12,org4:0.23)g:0.33)b:0.2,(org5:0.44,org6:0.55)c:0.3,org7:0.4)" self.old_t = DndParser(self.old_t_str, UniFracTreeNode) self.old_env_str = """ org1 env1 1 org1 env2 1 org2 env2 1 org3 env2 1 org4 env3 1 org5 env1 1 org6 env1 1 org7 env3 1 """ self.old_env_counts = count_envs(self.old_env_str.splitlines()) self.old_node_index, self.old_nodes = index_tree(self.old_t) self.old_count_array, self.old_unique_envs, self.old_env_to_index, self.old_node_to_index = index_envs( self.old_env_counts, self.old_node_index ) self.old_branch_lengths = get_branch_lengths(self.old_node_index)
def setUp(self): """Define a couple of standard trees""" self.t1 = DndParser('(((a,b),c),(d,e))', UniFracTreeNode) self.t2 = DndParser('(((a,b),(c,d)),(e,f))', UniFracTreeNode) self.t3 = DndParser('(((a,b,c),(d)),(e,f))', UniFracTreeNode) self.t4 = DndParser('((c)b,((f,g,h)e,i)d)', UniFracTreeNode) self.t4.Name = 'a' self.t_str = '((a:1,b:2):4,(c:3,(d:1,e:1):2):3)' self.t = DndParser(self.t_str, UniFracTreeNode) self.env_str = """ a A 1 a C 2 b A 1 b B 1 c B 1 d B 3 e C 1""" self.env_counts = count_envs(self.env_str.splitlines()) self.node_index, self.nodes = index_tree(self.t) self.count_array, self.unique_envs, self.env_to_index, \ self.node_to_index = index_envs(self.env_counts, self.node_index) self.branch_lengths = get_branch_lengths(self.node_index) self.old_t_str = '((org1:0.11,org2:0.22,(org3:0.12,org4:0.23)g:0.33)b:0.2,(org5:0.44,org6:0.55)c:0.3,org7:0.4)' self.old_t = DndParser(self.old_t_str, UniFracTreeNode) self.old_env_str = """ org1 env1 1 org1 env2 1 org2 env2 1 org3 env2 1 org4 env3 1 org5 env1 1 org6 env1 1 org7 env3 1 """ self.old_env_counts = count_envs(self.old_env_str.splitlines()) self.old_node_index, self.old_nodes = index_tree(self.old_t) self.old_count_array, self.old_unique_envs, self.old_env_to_index, \ self.old_node_to_index = index_envs(self.old_env_counts, self.old_node_index) self.old_branch_lengths = get_branch_lengths(self.old_node_index)
def test_PD_generic_whole_tree(self): """PD_generic_whole_tree should correctly compute PD for test tree.""" self.t1 = DndParser('((a:1,b:2):4,(c:3,(d:1,e:1):2):3)', \ UniFracTreeNode) self.env_str = """ a A 1 a C 2 b A 1 b B 1 c B 1 d B 3 e C 1""" env_counts = count_envs(self.env_str.splitlines()) self.assertEqual(PD_generic_whole_tree(self.t1,self.env_counts), \ (['A','B','C'], array([7.,15.,11.])))
def test_count_envs(self): """count_envs should return correct counts from lines""" envs = """ a A 3 some other junk a B a C 1 b A 2 skip c B d b A 99 """ result = count_envs(envs.splitlines()) self.assertEqual(result, {"a": {"A": 3, "B": 1, "C": 1}, "b": {"A": 99}, "c": {"B": 1}})
def test_shared_branch_length_to_root(self): """Should return the correct shared branch length by env to root""" t_str = "(((a:1,b:2):3,c:4),(d:5,e:6,f:7):8);" envs = """ a A 1 b A 1 c A 1 d A 1 e A 1 f B 1 """ env_counts = count_envs(envs.splitlines()) t = DndParser(t_str, UniFracTreeNode) exp = {'A':29.0,'B':15.0} obs = shared_branch_length_to_root(t, env_counts) self.assertEqual(obs, exp)
def test_count_envs(self): """count_envs should return correct counts from lines""" envs = """ a A 3 some other junk a B a C 1 b A 2 skip c B d b A 99 """ result = count_envs(envs.splitlines()) self.assertEqual(result, \ {'a':{'A':3,'B':1,'C':1},'b':{'A':99},'c':{'B':1}})
def test_sum_env_dict(self): """sum_env_dict should return correct counts from env_dict""" envs = """ a A 3 some other junk a B a C 1 b A 2 skip c B d b A 99 """ result = count_envs(envs.splitlines()) sum_ = sum_env_dict(result) self.assertEqual(sum_, 105)
def test_shared_branch_length_to_root(self): """Should return the correct shared branch length by env to root""" t_str = "(((a:1,b:2):3,c:4),(d:5,e:6,f:7):8);" envs = """ a A 1 b A 1 c A 1 d A 1 e A 1 f B 1 """ env_counts = count_envs(envs.splitlines()) t = DndParser(t_str, UniFracTreeNode) exp = {'A': 29.0, 'B': 15.0} obs = shared_branch_length_to_root(t, env_counts) self.assertEqual(obs, exp)
def test_PD_whole_tree(self): """PD_whole_tree should correctly compute PD for test tree. environment M contains only tips not in tree, tip j is in no envs """ t1 = DndParser('((a:1,b:2):4,((c:3, j:17),(d:1,e:1):2):3)', \ UniFracTreeNode) env_str = """ a A 1 a C 2 b A 1 b B 1 c B 1 d B 3 e C 1 m M 88""" env_counts = count_envs(env_str.splitlines()) self.assertEqual(PD_whole_tree(t1,env_counts), \ (['A','B','C'], array([7.,15.,11.])))
def test_unifrac_explicit(self): """unifrac should correctly compute correct values. environment M contains only tips not in tree, tip j is in no envs values were calculated by hand """ t1 = DndParser('((a:1,b:2):4,((c:3, j:17),(d:1,e:1):2):3)', \ UniFracTreeNode) # note c,j is len 0 node # /-------- /-a # ---------| \-b # | /-------- /-c # \--------| \-j # \-------- /-d # \-e env_str = """ a A 1 a C 2 b A 1 b B 1 c B 1 d B 3 e C 1 m M 88""" env_counts = count_envs(env_str.splitlines()) self.assertFloatEqual(fast_unifrac(t1,env_counts)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) # changing tree topology relative to c,j tips shouldn't change # anything t2 = DndParser('((a:1,b:2):4,((c:2, j:16):1,(d:1,e:1):2):3)', \ UniFracTreeNode) self.assertFloatEqual(fast_unifrac(t2,env_counts)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C']))
def test_shared_branch_length(self): """Should return the correct shared branch length by env""" t_str = "(((a:1,b:2):3,c:4),(d:5,e:6,f:7):8);" envs = """ a A 1 b A 1 c A 1 d A 1 e A 1 f B 1 """ env_counts = count_envs(envs.splitlines()) t = DndParser(t_str, UniFracTreeNode) exp = {('A', ): 21.0, ('B', ): 7.0} obs = shared_branch_length(t, env_counts, 1) self.assertEqual(obs, exp) exp = {('A', 'B'): 8.0} obs = shared_branch_length(t, env_counts, 2) self.assertEqual(obs, exp) self.assertRaises(ValueError, shared_branch_length, t, env_counts, 3)
def test_shared_branch_length(self): """Should return the correct shared branch length by env""" t_str = "(((a:1,b:2):3,c:4),(d:5,e:6,f:7):8);" envs = """ a A 1 b A 1 c A 1 d A 1 e A 1 f B 1 """ env_counts = count_envs(envs.splitlines()) t = DndParser(t_str, UniFracTreeNode) exp = {('A',):21.0,('B',):7.0} obs = shared_branch_length(t, env_counts, 1) self.assertEqual(obs, exp) exp = {('A','B'):8.0} obs = shared_branch_length(t, env_counts, 2) self.assertEqual(obs, exp) self.assertRaises(ValueError, shared_branch_length, t, env_counts, 3)
def setUp(self): """Define some standard trees.""" self.t_str = '((a:1,b:2):4,(c:3,(d:1,e:1):2):3)' self.t = DndParser(self.t_str, UniFracTreeNode) self.env_str = """ a A 1 a C 2 b A 1 b B 1 c B 1 d B 3 e C 1""" self.env_counts = count_envs(self.env_str.splitlines()) self.missing_env_str = """ a A 1 a C 2 e C 1""" self.missing_env_counts = count_envs(self.missing_env_str.splitlines()) self.extra_tip_str = """ q A 1 w C 2 e A 1 r B 1 t B 1 y B 3 u C 1""" self.extra_tip_counts = count_envs(self.extra_tip_str.splitlines()) self.wrong_tip_str = """ q A 1 w C 2 r B 1 t B 1 y B 3 u C 1""" self.wrong_tip_counts = count_envs(self.wrong_tip_str.splitlines()) self.t2_str = '(((a:1,b:1):1,c:5):2,d:4)' self.t2 = DndParser(self.t2_str, UniFracTreeNode) self.env2_str = """ a B 1 b A 1 c A 2 c C 2 d B 1 d C 1""" self.env2_counts = count_envs(self.env2_str.splitlines()) self.trees = [self.t, self.t2] self.envs = [self.env_counts, self.env2_counts] self.mc_1 = array([.5, .4, .3, .2, .1, .6, .7, .8, .9, 1.0]) # from old EnvsNode tests self.old_t_str = '((org1:0.11,org2:0.22,(org3:0.12,org4:0.23)g:0.33)b:0.2,(org5:0.44,org6:0.55)c:0.3,org7:0.4)' self.old_t = DndParser(self.old_t_str, UniFracTreeNode) self.old_env_str = """ org1 env1 1 org1 env2 1 org2 env2 1 org3 env2 1 org4 env3 1 org5 env1 1 org6 env1 1 org7 env3 1 """ self.old_env_counts = count_envs(self.old_env_str.splitlines()) self.old_node_index, self.old_nodes = index_tree(self.old_t) self.old_count_array, self.old_unique_envs, self.old_env_to_index, \ self.old_node_to_index = index_envs(self.old_env_counts, self.old_node_index) self.old_branch_lengths = get_branch_lengths(self.old_node_index)
def test_unifrac_make_subtree(self): """unifrac result should not depend on make_subtree environment M contains only tips not in tree, tip j, k is in no envs one clade is missing entirely values were calculated by hand we also test that we still have a valid tree at the end """ t1 = DndParser('((a:1,b:2):4,((c:3, (j:1,k:2)mt:17),(d:1,e:1):2):3)',\ UniFracTreeNode) # note c,j is len 0 node # /-------- /-a # ---------| \-b # | /-------- /-c # \--------| \mt------ /-j # | \-k # \-------- /-d # \-e # env_str = """ a A 1 a C 2 b A 1 b B 1 c B 1 d B 3 e C 1 m M 88""" env_counts = count_envs(env_str.splitlines()) self.assertFloatEqual(fast_unifrac(t1,env_counts,make_subtree=False)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) self.assertFloatEqual(fast_unifrac(t1,env_counts,make_subtree=True)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) # changing tree topology relative to c,j tips shouldn't change anything t2 = DndParser('((a:1,b:2):4,((c:2, (j:1,k:2)mt:17):1,(d:1,e:1):2):3)', \ UniFracTreeNode) self.assertFloatEqual(fast_unifrac(t2,env_counts,make_subtree=False)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) self.assertFloatEqual(fast_unifrac(t2,env_counts,make_subtree=True)['distance_matrix'], \ (array( [[0,10/16, 8/13], [10/16,0,8/17], [8/13,8/17,0]]),['A','B','C'])) # ensure we haven't meaningfully changed the tree # by passing it to unifrac t3 = DndParser('((a:1,b:2):4,((c:3, (j:1,k:2)mt:17),(d:1,e:1):2):3)',\ UniFracTreeNode) # note c,j is len 0 node t1_tips = [tip.Name for tip in t1.tips()] t1_tips.sort() t3_tips = [tip.Name for tip in t3.tips()] t3_tips.sort() self.assertEqual(t1_tips, t3_tips) tipj3 = t3.getNodeMatchingName('j') tipb3 = t3.getNodeMatchingName('b') tipj1 = t1.getNodeMatchingName('j') tipb1 = t1.getNodeMatchingName('b') self.assertFloatEqual(tipj1.distance(tipb1), tipj3.distance(tipb3))