def test_delete_tree_XML(self): XML = etree.tostring(etree.parse('data/input/create_matrix.phyml', parser), pretty_print=True) name = "Hill_Davis_2011_1" trees = obtain_trees(XML) old_len = len(trees) new_xml = _swap_tree_in_XML(XML, None, name) trees = obtain_trees(new_xml) # loop through all trees, checking them self.assert_( trees['Davis_2011_1'] == "((A:1.00000,B:1.00000)0.00000:0.00000,(C:1.00000,D:1.00000)0.00000:0.00000)0.00000:0.00000;" ) self.assert_(len(trees) == old_len - 1)
def test_permute_trees_3(self): XML = etree.tostring(etree.parse('data/input/permute_trees.phyml', parser), pretty_print=True) trees = obtain_trees(XML) # contains quoted taxa too output = permute_tree(trees['Hill_Davis_2011_2'], treefile="newick") self.assert_(_trees_equal(output, "(A, (B, (C, D, E_E, F, G)));"))
def test_get_all_trees(self): XML = etree.tostring(etree.parse(single_source_input, parser), pretty_print=True) tree = obtain_trees(XML) # Tree key is source_name_tree_no, so we should have # Hill_2011_1 expected_tree = '((A_1:1.00000,B_1:1.00000)0.00000:0.00000,F_1:1.00000,E_1:1.00000,(G_1:1.00000,H_1:1.00000)0.00000:0.00000)0.00000:0.00000;' self.assert_(tree['Hill_2011_1'] == expected_tree)
def test_delete_tree_XML_and_remove_source(self): XML = etree.tostring(etree.parse('data/input/clean_data.phyml', parser), pretty_print=True) names = ["Hill_2012_1", "Hill_2012_2"] names.sort(reverse=True) trees = obtain_trees(XML) old_len = len(trees) new_xml = XML for name in names: new_xml = _swap_tree_in_XML(new_xml, None, name, delete=True) trees = obtain_trees(new_xml) self.assert_(len(trees) == old_len - 2) # check only one source remains names = get_all_source_names(new_xml) self.assert_(len(names) == 1) self.assert_(names[0] == "Hill_2011")
def test_permute_trees_2(self): XML = etree.tostring(etree.parse('data/input/permute_trees.phyml', parser), pretty_print=True) trees = obtain_trees(XML) output = permute_tree(trees['Davis_2011_1'], treefile="newick") temp_file_handle, temp_file = tempfile.mkstemp(suffix=".new") f = open(temp_file, "w") f.write(output) f.close() output_trees = import_trees(temp_file) expected_trees = import_trees("data/output/permute_trees_2.nex") os.remove(temp_file) self.assert_(len(output_trees) == len(expected_trees)) for i in range(0, len(output_trees)): self.assert_(_trees_equal(output_trees[i], expected_trees[i]))
def test_insert_tree_XML(self): XML = etree.tostring(etree.parse('data/input/create_matrix.phyml', parser), pretty_print=True) name = "Hill_Davis_2011_1" tree = "(a,b,c);" new_xml = _swap_tree_in_XML(XML, tree, name) trees = obtain_trees(new_xml) # loop through all trees, checking them self.assert_( trees['Hill_2011_1'] == "((A:1.00000,B:1.00000)0.00000:0.00000,(F:1.00000,E:1.00000)0.00000:0.00000)0.00000:0.00000;" ) self.assert_( trees['Davis_2011_1'] == "((A:1.00000,B:1.00000)0.00000:0.00000,(C:1.00000,D:1.00000)0.00000:0.00000)0.00000:0.00000;" ) self.assert_(trees[name] == "(a,b,c);")
def test_clean_data(self): XML = etree.tostring(etree.parse('data/input/clean_data.phyml', parser), pretty_print=True) XML = clean_data(XML) trees = obtain_trees(XML) self.assert_(len(trees) == 2) expected_trees = { 'Hill_2011_2': '(A,B,(C,D,E));', 'Hill_2011_1': '(A, B, C, (D, E, F));' } for t in trees: self.assert_(_trees_equal(trees[t], expected_trees[t])) # check only one source remains names = get_all_source_names(XML) self.assert_(len(names) == 1) self.assert_(names[0] == "Hill_2011")
def test_amalgamate_trees_tnt(self): XML = etree.tostring(etree.parse('data/input/old_stk_input.phyml', parser), pretty_print=True) output_string = amalgamate_trees(XML, format="tnt") trees = obtain_trees(XML) # save the file and read it back in. Then we check correct format (i.e. readable) and # we can check the trees are correct temp_file_handle, temp_file = tempfile.mkstemp(suffix=".tre") f = open(temp_file, "w") f.write(output_string) f.close() try: trees_read = import_trees(temp_file) except: self.assert_(False) # we should get no error os.remove(temp_file) self.assert_(len(trees) == len(trees_read)) names = trees.keys() for i in range(0, len(trees)): self.assert_(_trees_equal(trees_read[i], trees[names[i]]))
def main(): # do stuff parser = argparse.ArgumentParser( prog="supertree_support", description="Add supertree support measures to your supertree.", ) parser.add_argument('-v', '--verbose', action='store_true', help="Verbose output: mainly progress reports.", default=False) parser.add_argument( '--measure', choices=['v', 'v+', 'v-', 'wv', 'wv+', 'wv-'], default='v+', help= "Choose measure to add to the supertree output. The w version weight against the length of the source tree." ) parser.add_argument('input_file', metavar='input_file', nargs=1, help="Your Phyml") parser.add_argument('input_supertree', metavar='input_supertree', nargs=1, help="Your supertree") parser.add_argument( 'output_file', metavar='output_file', nargs=1, help="The output stub for all the various output files") args = parser.parse_args() verbose = args.verbose measure = args.measure input_file = args.input_file[0] input_supertree = args.input_supertree[0] output_stub = args.output_file[0] if measure == 'wv+': measure = 10 elif measure == 'wv': measure = 9 elif measure == 'wv-': measure = 11 elif measure == 'v+': measure = 7 elif measure == 'v': measure = 6 elif measure == 'v-': measure = 8 else: print "Unknown metric" sys.exit(-1) # see p4.SuperTreeSupport for indices. # if non stadard decoration, what metric to use # [S, P, R, Q, WS, WP, V, V+, V-, wV, wV+,wV-] # 0 1 2 3 4 5 6 7 8 9 10 11 # grab taxa in dataset if (verbose): print "Parsing PHYML" XML = stk.load_phyml(input_file) taxa = stk.get_all_taxa(XML) # load supertree supertree_data = stk.import_tree(input_supertree) supertree = stk._parse_tree(supertree_data) terminals = supertree.getAllLeafNames(supertree.root) if (not len(taxa) == len(terminals)): # this happens if the supertree has been pruned to remove dodgy taxa if (verbose): print "Warning: supertree contains different number of taxa to your input data. Pruning input data" taxa.sort() terminals.sort() delete_me = [] # create subs file for t in taxa: if not t in terminals: delete_me.append(t) # strip from phyml if (verbose): print "Deleting: " + str(len(delete_me)) + " from original " + str( len(taxa)) try: XML = stk.substitute_taxa(XML, delete_me, ignoreWarnings=True) # do we need a clean data to check for non-informative trees here? except TreeParseError as detail: msg = "***Error: failed to parse a tree in your data set.\n" + detail.msg print msg return # get all trees from phyml input_trees = stk.obtain_trees(XML) source_trees = [] for t in input_trees: source_trees.append(stk._parse_tree(input_trees[t])) sts = SuperTreeSupport(input_supertree, source_trees) sts.doSaveDecoratedTree = True sts.doStandardDecoration = False sts.decorationMetric = measure sts.decoratedFilename = output_stub + '_dec_st.nex' sts.doSaveIndexTree = True sts.indexFilename = output_stub + '_index.nex' sts.csvFilename = output_stub + '_index.csv' sts.doDrawTree = False sts.verbose = 1 sts.superTreeSupport()