def test_load_from_server(self): # Not valid use_data_from with self.assertRaises(TypeError): ham.Ham(use_data_from='xxx') # Valid use_data_from but missing query_database with self.assertRaises(TypeError): ham.Ham(use_data_from='oma')
def test_wrong_type_hog_file(self): with self.assertRaises(TypeError): ham.Ham(self.nwk_str, self.orthoxml_path, type_hog_file='xs') with self.assertRaises(TypeError): ham.Ham(self.nwk_str, self.orthoxml_path, type_hog_file='') with self.assertRaises(TypeError): ham.Ham(self.nwk_str, self.orthoxml_path, type_hog_file=None)
def test_wrong_newick_str(self): with self.assertRaises(ete3.parser.newick.NewickError): ham.Ham(self.nwk_str_empty, self.orthoxml_path, type_hog_file='orthoxml') with self.assertRaises(ete3.parser.newick.NewickError): ham.Ham(self.nwk_str_wrong, self.orthoxml_path, type_hog_file='orthoxml')
def test_orthoxml_as_string(self): self.ham_analysis = ham.Ham(tree_file=self.nwk_str, hog_file=self.orthoxml_string, type_hog_file='orthoxml', orthoXML_as_string=True) with self.assertRaises(IOError): self.ham_analysis = ham.Ham(tree_file=self.nwk_str, hog_file=self.orthoxml_string, type_hog_file='orthoxml')
def test_phyloxml_tag(self): with self.assertRaises(TypeError): ham.Ham(self.phyloxml_file, self.orthoxml_path, type_hog_file='orthoxml', tree_format='phyloxml', phyloxml_leaf_name_tag='None') ham.Ham(self.phyloxml_file, self.orthoxml_path, type_hog_file='orthoxml', tree_format='phyloxml', phyloxml_leaf_name_tag='clade_name')
def setUp(self): nwk_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk') tree_str = utils.get_newick_string(nwk_path, type="nwk") orthoxml_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.orthoxml') self.ham_analysis = ham.Ham(tree_file=tree_str, hog_file=orthoxml_path, type_hog_file='orthoxml', use_internal_name=True) self.human = self.ham_analysis.get_extant_genome_by_name( name="HUMAN") self.frog = self.ham_analysis.get_extant_genome_by_name( name="XENTR") self.mouse = self.ham_analysis.get_extant_genome_by_name( name="MOUSE") self.rat = self.ham_analysis.get_extant_genome_by_name( name="RATNO") self.chimp = self.ham_analysis.get_extant_genome_by_name( name="PANTR") self.vertebrates = self.ham_analysis.get_ancestral_genome_by_mrca_of_genome_set( {self.human, self.frog}) self.rodents = self.ham_analysis.get_ancestral_genome_by_mrca_of_genome_set( {self.mouse, self.rat}) self.primates = self.ham_analysis.get_ancestral_genome_by_mrca_of_genome_set( {self.human, self.chimp}) self.euarchontoglires = self.ham_analysis.get_ancestral_genome_by_mrca_of_genome_set( {self.human, self.mouse})
def test_non_luca_root_hog_works_from_omabrowser(self): analysis = ham.Ham(query_database='P53_RAT', use_data_from='oma') fn = os.path.join(self.tmpdir, 'tree_profile.html') analysis.create_tree_profile(outfile=fn) with open(fn, 'rt') as fh: html = fh.read() self.assertIn('treeData', html)
def setUp(self): nwk_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk') tree_str = utils.get_newick_string(nwk_path, type="nwk") orthoxml_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.orthoxml') self.ham_analysis = ham.Ham(tree_file=tree_str, hog_file=orthoxml_path, type_hog_file='orthoxml', use_internal_name=True)
def setUp(self): self.phyloxml_path = os.path.join( os.path.dirname(__file__), "data", "p53_augmented_speciestree.phyloxml") self.orthoxml_path = os.path.join(os.path.dirname(__file__), "data", "p53_augmented.orthoxml") self.ham_analysis = ham.Ham(tree_file=self.phyloxml_path, tree_format="phyloxml", hog_file=self.orthoxml_path, type_hog_file="orthoxml", use_internal_name=True)
def test_parser_accepts_multiple_consequative_dups(self): ham_analysis = ham.Ham(tree_file=self.nwk_str, hog_file=self.orthoxml_path, type_hog_file='orthoxml', use_internal_name=True) hogs = ham_analysis.get_list_top_level_hogs() self.assertEqual(1, len(hogs)) hog = hogs[0] self.assertEqual(5, len(hog.children)) self.assertFalse(hog.children[0].arose_by_duplication) dup_events = {g.arose_by_duplication for g in hog.children[1:]} self.assertEqual(1, len(dup_events))
def test_treeProfile_on_full_setup(self): logging.basicConfig( level=logging.INFO, format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s") nwk_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk') tree_str = utils.get_newick_string(nwk_path, type="nwk") orthoxml_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.orthoxml') ham_analysis = ham.Ham(tree_file=tree_str, hog_file=orthoxml_path, type_hog_file='orthoxml')
def setUp(self): nwk_path = os.path.join(os.path.dirname(__file__), './data/parser/tree.newick') nwk_str = utils.get_newick_string(nwk_path, type="nwk") orthoxml_path = os.path.join( os.path.dirname(__file__), './data/parser/conflict_duplication_children.orthoxml') self.ham_analysis = ham.Ham(tree_file=nwk_str, hog_file=orthoxml_path, type_hog_file='orthoxml', use_internal_name=True)
def setUp(self): nwk_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk') nwk_str = utils.get_newick_string(nwk_path, type="nwk") nwk_path_no_name = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk') nwk_str_no_name = utils.get_newick_string(nwk_path_no_name, type="nwk") orthoxml_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.orthoxml') # using newick with name on both internal nodes and leaves self.h = ham.Ham(nwk_str, orthoxml_path) # using newick with name only at leaves self.hn = ham.Ham(nwk_str_no_name, orthoxml_path) # using newick with name on both internal nodes and leaves and filter for HOG2 self.filter_genome = {"HUMAN", "MOUSE", "CANFA", "PANTR"} self.filter_genes = {'2', '32', '22', '12'} self.filter_genes_ext = {'HUMAN2', 'MOUSE2', 'CANFA2', 'PANTR2'} self.filter_hogs = {'2'} self.no_filter_genome = {"XENTR", "RATNO"} self.no_filter_genes = { '1', '11', '21', '31', '41', '51', '3', '13', '23', '33', '53', '34', '14' } self.no_filter_genes_ext = { 'HUMAN1', 'PANTR1', 'CANFA1', 'MOUSE1', 'RATNO1', 'XENTR1', 'HUMAN3', 'PANTR3', 'CANFA3', 'MOUSE3', 'XENTR3', 'MOUSE4', 'PANTR4' } self.no_filter_hogs = {'1', '3'} f = ham.ParserFilter() f.add_hogs_via_hogId([2]) self.hf = ham.Ham(nwk_str, orthoxml_path, filter_object=f)
def setUp(self): nwk_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk') nwk_str = utils.get_newick_string(nwk_path, type="nwk") orthoxml_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.orthoxml') self.ham_analysis = ham.Ham(tree_file=nwk_str, hog_file=orthoxml_path, type_hog_file='orthoxml', use_internal_name=True) self.hogs = self.ham_analysis.get_dict_top_level_hogs() self.genes = self.ham_analysis.get_dict_extant_genes()
def test_load_taxonomy_from_nwk_file_and_all_hogs_from_orthoxml_file_no_filter( self): # load the logger logging.basicConfig( level=logging.INFO, format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s") # Clement select a nwk file as a taxonomy reference nwk_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk') # And extract the newick tree as a string tree_str = utils.get_newick_string(nwk_path, type="nwk") # then clement select his favorite orthoXML file orthoxml_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.orthoxml') # Clement create the Ham object that will be the kernel of all analysis ham_analysis = ham.Ham(tree_file=tree_str, hog_file=orthoxml_path, use_internal_name=True, type_hog_file='orthoxml') # And verifying if all tree elements are created self.assertEqual( ham_analysis.taxonomy.tree_str, "(XENTR,(((HUMAN,PANTR)Primates,(MOUSE,RATNO)Rodents)Euarchontoglires," "CANFA)Mammalia)Vertebrata;") # After he get all the top level hogs self.assertEqual(len(ham_analysis.top_level_hogs), 3) self.assertEqual(len(ham_analysis.extant_gene_map), 19) # Clement is curious to look at the species present within this taxonomy extant_genomes = ham_analysis.get_list_extant_genomes() # then look if its 6 species are present extant_genomes_name = set(ext_genome.name for ext_genome in extant_genomes) self.assertEqual( extant_genomes_name, {'RATNO', 'HUMAN', 'CANFA', 'PANTR', 'XENTR', 'MOUSE'}) self.assertEqual(len(ham_analysis.taxonomy.leaves), 6) self.assertEqual(len(ham_analysis.taxonomy.internal_nodes), 5)
def test_lineage_comparative_analysis(self): # Clement initialise the pyham analyzer objet as it's explained in the documentation logging.basicConfig( level=logging.WARNING, format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s") nwk_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk') tree_str = utils.get_newick_string(nwk_path, type="nwk") orthoxml_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.orthoxml') ham_analysis = ham.Ham(tree_file=tree_str, hog_file=orthoxml_path, type_hog_file='orthoxml', use_internal_name=True) # Then clement is interest to investigate on what happened between the ancestral genomes of vertebrates # and the extent genomes of the mouse. # First he select the related genomes objectt via their name or mrca. mouse = ham_analysis.get_extant_genome_by_name(name="MOUSE") frog = ham_analysis.get_extant_genome_by_name(name="XENTR") vertebrates = ham_analysis.get_ancestral_genome_by_mrca_of_genome_set( {mouse, frog}) # Then, he compare the two genomes of interest vertical_map_mouse_vs_vert = ham_analysis.compare_genomes_vertically( mouse, vertebrates) # Now he is interest by the HOG that have stay single copy between these two levels self.assertDictEqual({'<HOG(1)>': 'Gene(31)'}, _str_dict_one_value( vertical_map_mouse_vs_vert.get_retained())) # ... and at the duplicated genes self.assertDictEqual({'<HOG(3)>': {'Gene(34)', 'Gene(33)'}}, _str_dict_array_value( vertical_map_mouse_vs_vert.get_duplicated())) # Clement is curious and want to look if there is gene that have been lost self.assertSetEqual(set(), _str_array(vertical_map_mouse_vs_vert.get_lost()))
def test_load_taxonomy_from_nwk_file_and_from_orthoxml_file_with_filter_hog2( self): # load the logger logging.basicConfig( level=logging.INFO, format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s") # Clement select a nwk file as a taxonomy reference nwk_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk') # And extract the newick tree as a string tree_str = utils.get_newick_string(nwk_path, type="nwk") # Clement will now setup the filter object f = ham.ParserFilter() f.add_hogs_via_hogId([2]) # Clement check that the filter contained all information self.assertEqual(set(f.HOGId_filter), {'2'}) self.assertEqual(set(f.GeneExtId_filter), set()) self.assertEqual(set(f.GeneIntId_filter), set()) # then clement select his favorite orthoXML file orthoxml_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.orthoxml') # Clement create the Ham object that will be the kernel of all analysis ham_analysis = ham.Ham(tree_str, orthoxml_path, filter_object=f, use_internal_name=True) self.assertEqual(f, ham_analysis.filter_obj) # Clement check that what the filter understood was good self.assertSetEqual(set(ham_analysis.filter_obj.geneUniqueId), {'2', '32', '22', '12'}) self.assertSetEqual(set(ham_analysis.filter_obj.hogsId), {'2'}) # Clement check that the parsed informatio is correct self.assertEqual(len(ham_analysis.top_level_hogs), 1) self.assertEqual(len(ham_analysis.extant_gene_map), 4)
def setUp(self): nwk_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk') tree_str = utils.get_newick_string(nwk_path, type="nwk") orthoxml_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.orthoxml') self.ham_analysis = ham.Ham(tree_file=tree_str, hog_file=orthoxml_path, type_hog_file='orthoxml') self.hogs = self.ham_analysis.get_dict_top_level_hogs() self.genes = self.ham_analysis.get_dict_extant_genes() self.human = self.ham_analysis.get_extant_genome_by_name(name="HUMAN") self.frog = self.ham_analysis.get_extant_genome_by_name(name="XENTR") self.mouse = self.ham_analysis.get_extant_genome_by_name(name="MOUSE") self.rat = self.ham_analysis.get_extant_genome_by_name(name="RATNO") self.chimp = self.ham_analysis.get_extant_genome_by_name(name="PANTR") self.vertebrates = self.ham_analysis.get_ancestral_genome_by_mrca_of_genome_set( {self.human, self.frog})
def setUp(self): nwk_path = os.path.join(os.path.dirname(__file__), './data/Conflict1/tree.nwk') tree_str = utils.get_newick_string(nwk_path, type="nwk") orthoxml_path = os.path.join(os.path.dirname(__file__), './data/Conflict1/hog.orthoxml') self.ham_analysis = ham.Ham(tree_file=tree_str, hog_file=orthoxml_path, type_hog_file='orthoxml', use_internal_name=True) self.hog = self.ham_analysis.get_list_top_level_hogs()[0]
def setUp(self): nwk_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk') nwk_str = utils.get_newick_string(nwk_path, type="nwk") orthoxml_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.orthoxml') self.h = ham.Ham(nwk_str, orthoxml_path, use_internal_name=True)
def test_wrong_filter(self): with self.assertRaises(TypeError): ham.Ham(self.nwk_str, self.orthoxml_path, filter_object="x")
def setUp(self): nwk_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk') tree_str = utils.get_newick_string(nwk_path, type="nwk") orthoxml_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.orthoxml') self.ham_analysis = ham.Ham(tree_file=tree_str, hog_file=orthoxml_path, type_hog_file='orthoxml', use_internal_name=True) self.ham_analysis_no_name = ham.Ham(tree_file=tree_str, hog_file=orthoxml_path, type_hog_file='orthoxml', use_internal_name=False) # gene, retained, duplicated, gain, lost self.exp_full = { "Mammalia": [3, 2, 0, 1, 0], "Euarchontoglires": [4, 2, 2, 0, 0], "Primates": [4, 4, 0, 0, 0], "Rodents": [4, 4, 0, 0, 0], "Vertebrata": [2, None, None, None, None] } self.exp_full_nn = { "HUMAN/PANTR/MOUSE/RATNO/CANFA": [3, 2, 0, 1, 0], "HUMAN/PANTR/MOUSE/RATNO": [4, 2, 2, 0, 0], "HUMAN/PANTR": [4, 4, 0, 0, 0], "MOUSE/RATNO": [4, 4, 0, 0, 0], "XENTR/HUMAN/PANTR/MOUSE/RATNO/CANFA": [2, None, None, None, None], "HUMAN": [4, 3, 0, 1, 1], "PANTR": [4, 4, 0, 0, 0], "MOUSE": [4, 4, 0, 0, 0], "RATNO": [2, 1, 0, 1, 3], "CANFA": [3, 3, 0, 0, 0], "XENTR": [2, 2, 0, 0, 0] } # gene, retained, duplicated, lost self.expected_level_1 = { "Mammalia": [1, 1, 0, 0], "Euarchontoglires": [1, 1, 0, 0], "Primates": [1, 1, 0, 0], "Rodents": [1, 1, 0, 0], "Vertebrata": [1, None, None, None], "HUMAN": [1, 1, 0, 0], "PANTR": [1, 1, 0, 0], "MOUSE": [1, 1, 0, 0], "RATNO": [1, 1, 0, 0], "CANFA": [1, 1, 0, 0], "XENTR": [1, 1, 0, 0] } self.expected_level_2 = { "Mammalia": [1, None, None, None], "Euarchontoglires": [1, 1, 0, 0], "Primates": [1, 1, 0, 0], "Rodents": [1, 1, 0, 0], "HUMAN": [1, 1, 0, 0], "PANTR": [1, 1, 0, 0], "MOUSE": [1, 1, 0, 0], "RATNO": [0, 0, 0, 1], "CANFA": [1, 1, 0, 0] } self.expected_level_3 = { "Mammalia": [1, 1, 0, 0], "Euarchontoglires": [2, 0, 2, 0], "Primates": [2, 2, 0, 0], "Rodents": [2, 2, 0, 0], "Vertebrata": [1, None, None, None], "HUMAN": [1, 1, 0, 1], "PANTR": [2, 2, 0, 0], "MOUSE": [2, 2, 0, 0], "RATNO": [0, 0, 0, 2], "CANFA": [1, 1, 0, 0], "XENTR": [1, 1, 0, 0] }
def setUp(self): nwk_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk') nwk_str = utils.get_newick_string(nwk_path, type="nwk") nwk_path_no_name = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk') nwk_str_no_name = utils.get_newick_string(nwk_path_no_name, type="nwk") phyloxml_file = os.path.join(os.path.dirname(__file__), './data/simpleEx.phyloxml') orthoxml_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.orthoxml') with open(orthoxml_path, 'r') as orthoxml_file: data = orthoxml_file.read() self.orthoxml_string = data # using newick with name on both internal nodes and leaves self.h = ham.Ham(nwk_str, orthoxml_path, use_internal_name=True) # using newick with name only at leaves self.hn = ham.Ham(nwk_str_no_name, orthoxml_path) # using phyloxml file with name self.hpx = ham.Ham( phyloxml_file, orthoxml_path, use_internal_name=True, tree_format='phyloxml', phyloxml_internal_name_tag='taxonomy_scientific_name', phyloxml_leaf_name_tag='taxonomy_code') # using newick with name on both internal nodes and leaves and filter for HOG2 self.filter_genome = {"HUMAN", "MOUSE", "CANFA", "PANTR"} self.filter_genes = {'2', '32', '22', '12'} self.filter_genes_ext = {'HUMAN2', 'MOUSE2', 'CANFA2', 'PANTR2'} self.filter_hogs = {'2'} self.no_filter_genome = {"XENTR", "RATNO"} self.no_filter_genes = { '1', '11', '21', '31', '41', '51', '3', '13', '23', '33', '53', '34', '14' } self.no_filter_genes_ext = { 'HUMAN1', 'PANTR1', 'CANFA1', 'MOUSE1', 'RATNO1', 'XENTR1', 'HUMAN3', 'PANTR3', 'CANFA3', 'MOUSE3', 'XENTR3', 'MOUSE4', 'PANTR4' } self.no_filter_hogs = {'1', '3'} f = ham.ParserFilter() f.add_hogs_via_hogId([2]) self.hf = ham.Ham(nwk_str, orthoxml_path, filter_object=f, use_internal_name=True) # test that filter work with string self.hstring = ham.Ham(nwk_str, self.orthoxml_string, use_internal_name=True, orthoXML_as_string=True) # test that filter work with string self.hfstring = ham.Ham(nwk_str, self.orthoxml_string, filter_object=f, use_internal_name=True, orthoXML_as_string=True)