def test_bad_sampling(self): """ Test bad sampling simulation on an existing tree. """ tree = Tree(_TREES[-1]) ngesh.add_characters( tree, 10, k=4.0, th=1.0, z=1.045, k_hgt=2.0, th_hgt=1.1, seed="myseed", ) previous = tree.write() ngesh.simulate_bad_sampling(tree, 0.5, seed="myseed") digest = hashlib.sha256( str(ngesh.tree2wordlist(tree)).encode("utf-8")).digest() assert tree.write() != previous assert ( digest == b'\x8e\xe3\x9fzN\xbe0\xaa\xe2a\xc5\x854\x87>\xe6"s1?\xc1\x08YqM\xc4\xdd9Zh\xb37' )
def test_tree_output(self): # Add characters to all test trees trees = [ ngesh.add_characters(Tree(newick), 100, k=4.0, th=1.0, e=1.05, seed="myseed") for newick in _TREES ] # Assert the first one digest_nx = hashlib.sha256( str(ngesh.tree2nexus(trees[0])).encode("utf-8")).digest() digest_wl = hashlib.sha256( str(ngesh.tree2wordlist(trees[0])).encode("utf-8")).digest() assert ( digest_nx == b"E\xf8\x97\xb6*\x7f\xf4_j\x89\x02dn\x1d\xbe\xb0\xb6\xcd\xd9.\xca:\x9ft\xe2m\xc5y\xa5\xaa\x0fa" ) assert ( digest_wl == b"\xb6\xc8i\xf4!\xf4l\x91\xb9\x8d\xb5Kae\x1aF\x9c\xfd\n \x06\xf2D\x1e<\xdd(U]6(\xbf" ) # Test output for a tree without characters tree_nochar = Tree(_TREES[0]) digest_nx_nochar = hashlib.sha256( str(ngesh.tree2nexus(tree_nochar)).encode("utf-8")).digest() assert ( digest_nx_nochar == b"A\x06N\x97n1iD\x01n\x07T\x99al\xa8)m\n\x93\x9ajKp\x07\x9bc\x8a\x03\x1a\x8d\xb7" )
def tree_test(): CONCEPTS = 10 # generate a fixed tree for the tests tree = ngesh.gen_tree(1.0, 0.5, max_time=1.0, labels="human", seed=13) # TODO: need to add seed to `add_characters` # TODO: need to sort the taxa in output in `tree2nexus` tree = ngesh.add_characters(tree, CONCEPTS, 3.0, 0.5, seed=13) # collect all taxa and their characters, provided the characters exist # TODO: reuse code? already in tree2nexus data = {leaf.name: leaf.chars for leaf in tree.get_leaves()} # get the number of words to generate -- we generate all those that # will be used at the beginning, following their path as if they # always existed # TODO: generate based on phonology of current available ones? num_words = max([max(chars) for chars in data.values()]) + 1 words = abzu.kiss.random_words(num_words, param={}) print(tree) print(ngesh.tree2nexus(tree)) print(data) print(words, len(words)) for node in tree.iter_descendants(): print([node.name], node.dist) print(dir(node))
def test_add_characters_with_hgt(self): tree_hgt = Tree(_TREES[0]) ngesh.add_characters( tree_hgt, 10, k=4.0, th=1.0, z=1.045, k_hgt=2.0, th_hgt=1.1, seed="myseed", ) digest = hashlib.sha256( str(ngesh.tree2wordlist(tree_hgt)).encode("utf-8")).digest() assert ( digest == b"\xc0\x84\x8a\xa8\x0b\xd6\xf8\x1b\x9c\xc7\xe6\xaf]\xe7\xee\x105\x989\x9b\xcfke\xe5\xf3\x03\xdc\x17U\x14 \xac" )
def new_tree(args): """ Generates and returns a new tree. This function is a simple wrapper on the main `gen_tree()` and `add_characters()` functions. It is intented for command-line usage, but it can be used in code for quick prototyping. Parameters ---------- args : Namespace A namespace with the parameters for tree generation. Returns ------- tree: ete3 object The randomly generated tree. """ # Generate the random tree tree = ngesh.gen_tree( args.birth, args.death, min_leaves=args.min_leaves, max_time=args.max_time, labels=args.labels, lam=args.lam, seed=args.seed, ) # Add characters if requested if args.num_chars: tree = ngesh.add_characters( tree, args.num_chars, args.k_mut, args.th_mut, k_hgt=args.k_hgt, th_hgt=args.th_hgt, e=args.e_mut, seed=args.seed, ) # Simulate bad sampling if requested if args.sampling: ngesh.simulate_bad_sampling(tree, args.sampling, seed=args.seed) return tree
def test_add_characters(self): # gamma parameters NUM_CONCEPTS = 10 k = 4.0 # shape th = 1.0 # scale z = 1.045 # "zipf" correction # Add characters to all trees, for coverage trees = [ ngesh.add_characters(Tree(newick), NUM_CONCEPTS, k=k, th=th, z=z, seed="myseed") for newick in _TREES ] # Assert the first one digest = hashlib.sha256( str(ngesh.tree2wordlist(trees[0])).encode("utf-8")).digest() assert ( digest == b"\xdc<\x1f\x10N\xbf\xcc\xc4|l26\x10\xfc\xbaN\xb7\\c\x8bB\xca\x95.\xcbH\x82T\xa3\xbd\xff\x15" )