def test_name_tree(self): XML = etree.tostring(etree.parse( 'data/input/single_source_no_names.phyml', parser), pretty_print=True) xml_root = _parse_xml(XML) source_tree_element = xml_root.xpath( '/phylo_storage/sources/source/source_tree')[0] tree_name = create_tree_name(XML, source_tree_element) self.assert_(tree_name == 'Hill_2011_1')
def testSingleYear(self): XML = etree.tostring(etree.parse('data/input/sub_taxa.phyml',parser),pretty_print=True) searchTerms = {'years':[2011]} new_XML = create_subset(XML,searchTerms) #these data are all 2011 root = _parse_xml(new_XML) find = etree.XPath("//year") yrs = find(root) for y in yrs: self.assert_(int(y.xpath('integer_value')[0].text) == 2011)
def testRealDataCharMorphOrMol(self): XML = etree.tostring(etree.parse('data/input/old_stk_input.phyml',parser),pretty_print=True) searchTerms = {'character_types':["morphological","molecular"]} new_XML = create_subset(XML,searchTerms,andSearch=False) root = _parse_xml(new_XML) srcs = root.findall(".//source") names = [] for s in srcs: names.append(s.attrib['name']) names.sort() expected_names = ["Aleixo_2002","Aliabadian_etal_2007","Allende_etal_2001","Andersson_1999a","Aragon_etal_1999","Baker_Strauch_1988","Baker_etal_2005","Baker_etal_2006","Baker_etal_2007a","Baker_etal_2007b","Baptista_Visser_1999","Barber_Peterson_2004","Barhoum_Burns_2002","Bertelli_etal_2006"] self.assertListEqual(expected_names,names)
def testRealDataAllFossil(self): XML = etree.tostring(etree.parse('data/input/old_stk_input.phyml',parser),pretty_print=True) searchTerms = {'fossil':"all_fossil"} new_XML = create_subset(XML,searchTerms) root = _parse_xml(new_XML) srcs = root.findall(".//source") names = [] for s in srcs: names.append(s.attrib['name']) names.sort() expected_names = ["Baker_etal_2005"] self.assertListEqual(expected_names,names) src_trs = root.findall(".//source_tree") self.assert_(len(src_trs) == 1)
def testRealDataCharTaxonYearEmpty(self): XML = etree.tostring(etree.parse('data/input/old_stk_input.phyml',parser),pretty_print=True) searchTerms = {'taxa':["Gallus gallus"], 'years':['2009']} new_XML = create_subset(XML,searchTerms) root = _parse_xml(new_XML) srcs = root.findall(".//source") names = [] for s in srcs: names.append(s.attrib['name']) names.sort() expected_names = [] self.assertListEqual(expected_names,names) src_trs = root.findall(".//source_tree") self.assert_(len(src_trs) == 0)
def testRealDataCharMorphMol(self): XML = etree.tostring(etree.parse('data/input/old_stk_input.phyml',parser),pretty_print=True) searchTerms = {'character_types':["morphological","molecular"]} new_XML = create_subset(XML,searchTerms) root = _parse_xml(new_XML) srcs = root.findall(".//source") names = [] for s in srcs: names.append(s.attrib['name']) names.sort() expected_names = ["Bertelli_etal_2006"] self.assertListEqual(expected_names,names) src_trs = root.findall(".//source_tree") self.assert_(len(src_trs) == 1)
def testRealDataCharCytb(self): XML = etree.tostring(etree.parse('data/input/old_stk_input.phyml',parser),pretty_print=True) searchTerms = {'characters':["cytb"]} new_XML = create_subset(XML,searchTerms) root = _parse_xml(new_XML) srcs = root.findall(".//source") names = [] for s in srcs: names.append(s.attrib['name']) names.sort() expected_names = ["Aleixo_2002","Allende_etal_2001","Aragon_etal_1999","Baker_etal_2005","Baker_etal_2006","Baker_etal_2007a", "Baker_etal_2007b","Barhoum_Burns_2002"] self.assertListEqual(expected_names,names) # and two source trees too src_trs = root.findall(".//source_tree") self.assert_(len(src_trs) == 9)
def testRealDataCharType(self): XML = etree.tostring(etree.parse('data/input/old_stk_input.phyml',parser),pretty_print=True) searchTerms = {'character_types':["morphological"]} new_XML = create_subset(XML,searchTerms) root = _parse_xml(new_XML) find = etree.XPath("//character") srcs = root.findall(".//source") names = [] for s in srcs: names.append(s.attrib['name']) names.sort() expected_names = ["Aliabadian_etal_2007","Andersson_1999a","Baptista_Visser_1999","Bertelli_etal_2006"] self.assertListEqual(expected_names,names) # and two source trees too src_trs = root.findall(".//source_tree") self.assert_(len(src_trs) == 7)
def test_sort_data(self): XML = etree.tostring(etree.parse('data/input/create_matrix.phyml', parser), pretty_print=True) xml_root = _parse_xml(XML) xml_root = _sort_data(xml_root) # By getting source, we can then loop over each source_tree # within that source and construct a unique name find = etree.XPath("//source") sources = find(xml_root) names = [] for s in sources: # for each source, get source name names.append(s.attrib['name']) expected_names = ['Davis_2011', 'Hill_2011', 'Hill_Davis_2011'] self.assertListEqual(names, expected_names)
def testRealDataYears(self): XML = etree.tostring(etree.parse('data/input/old_stk_input.phyml',parser),pretty_print=True) searchTerms = {'years':[1999]} new_XML = create_subset(XML,searchTerms) root = _parse_xml(new_XML) find = etree.XPath("//year") yrs = find(root) i = 0 for y in yrs: self.assert_(int(y.xpath('integer_value')[0].text) == 1999) i+=1 self.assert_(i==4) srcs = root.findall(".//source") names = [] for s in srcs: names.append(s.attrib['name']) names.sort() expected_names = ["Andersson_1999a","Andersson_1999b","Aragon_etal_1999","Baptista_Visser_1999"] self.assertListEqual(expected_names,names)
def testCharType(self): XML = etree.tostring(etree.parse('data/input/create_matrix.phyml',parser),pretty_print=True) searchTerms = {'character_types':["molecular"]} new_XML = create_subset(XML,searchTerms) root = _parse_xml(new_XML) find = etree.XPath("//character") chrs = find(root) i = 0 for c in chrs: self.assert_(c.attrib['type'] == "molecular") i+=1 self.assert_(i==2) srcs = root.findall(".//source") names = [] for s in srcs: names.append(s.attrib['name']) names.sort() expected_names = ["Davis_2011","Hill_2011"] self.assertListEqual(expected_names,names) # and two source trees too src_trs = root.findall(".//source_tree") self.assert_(len(src_trs) == 2)
def test_add_weights(self): """Add weights to a bunch of trees""" XML = etree.tostring(etree.parse('data/input/check_data_ind.phyml', parser), pretty_print=True) # see above expected_idents = [[ 'Hill_Davis_2011_2', 'Hill_Davis_2011_1', 'Hill_Davis_2011_3' ], ['Hill_Davis_2013_1', 'Hill_Davis_2013_2']] # so the first should end up with a weight of 0.33333 and the second with 0.5 for ei in expected_idents: weight = 1.0 / float(len(ei)) XML = add_weights(XML, ei, weight) expected_weights = [ str(1.0 / 3.0), str(1.0 / 3.0), str(1.0 / 3.0), str(0.5), str(0.5) ] weights_in_xml = [] # now check weights have been added to the correct part of the tree xml_root = _parse_xml(XML) i = 0 for ei in expected_idents: for tree in ei: find = etree.XPath("//source_tree") trees = find(xml_root) for t in trees: if t.attrib['name'] == tree: # check len(trees) == 0 weights_in_xml.append( t.xpath("tree/weight/real_value")[0].text) self.assertListEqual(expected_weights, weights_in_xml)
def test_import_data(self): # Our validator object validator = SchemaValidator(rootDir="data") phyml = import_old_data('data/input/old_stk_test/', verbose=False) temp_file_handle, temp_file = tempfile.mkstemp(suffix=".phyml") f = open(temp_file, "w") f.write(phyml) f.close() validator.ValidateOptionFile( os.path.join("../../../schema", "phylo_storage.rng"), temp_file) passes = validator.Passes() optionErrors = validator.OptionErrors() validator.Reset() failures = [] for phyml_file in optionErrors: # We expect there to be missing taxon elements in the XML # as the user must fill these in, so check missing attributes, etc OK # and skip the missing element added_eles = optionErrors[phyml_file][1] for err in added_eles: if (err == "/phylo_storage/sources/source/source_tree/taxa_data/mixed_fossil_and_extant/taxon" and len(optionErrors[phyml_file][0]) + len(optionErrors[phyml_file][2]) + len(optionErrors[phyml_file][3]) == 0): continue if (err == "/phylo_storage/sources/source/source_tree/tree/topology" and len(optionErrors[phyml_file][0]) + len(optionErrors[phyml_file][2]) + len(optionErrors[phyml_file][3]) == 0): continue failures.append(phyml_file) if (len(failures) > 0): print failures self.assert_(len(failures) == 0) os.remove(temp_file) # parse XML and check various things XML = _parse_xml(phyml) name = XML.xpath('/phylo_storage/project_name/string_value')[0].text self.assert_(name == "old_stk_test") # check numebr of souces find = etree.XPath('//source') sources = find(XML) self.assert_(len(sources) == 15) # check names of sources: expected_names = [ 'Allende_etal_2001', 'Andersson_1999b', 'Baker_etal_2006', 'Aleixo_2002', 'Bertelli_etal_2006', 'Baker_etal_2007b', 'Aragon_etal_1999', 'Baker_etal_2007a', 'Aliabadian_etal_2007', 'Baker_Strauch_1988', 'Barhoum_Burns_2002', 'Barber_etal_2004', 'Baker_etal_2005', 'Andersson_1999a', 'Baptista_etal_1999' ] for s in sources: name = s.attrib['name'] self.assert_(name in expected_names) if name == "Bertelli_etal_2006": # this source publication has three trees, let's check that is the case! find = etree.XPath('source_tree') trees = find(s) self.assert_(len(trees) == 3) if name == "Baptista_etal_1999": volume = s.xpath( './/bibliographic_information/article/volume/string_value' )[0].text self.assert_(volume == "140")