def get_nexus_reader(thing): if isinstance(thing, str): return NexusReader.from_string(thing) if isinstance(thing, pathlib.Path): return NexusReader.from_file(thing) if isinstance(thing, NexusWriter): return NexusReader.from_string(thing.write()) assert isinstance(thing, NexusReader) return thing
def test_combine(): nex1 = NexusReader.from_string("""Begin trees; tree 1 = (a,b,c); end;""") nex2 = NexusReader.from_string("""Begin trees; tree 2 = (b,a,c); tree 3 = (b,c,a); end;""") newnex = combine_nexuses([nex1, nex2]) assert len(newnex.trees) == 3 assert newnex.trees[0] == "tree 1 = (a,b,c);" assert newnex.trees[1] == "tree 2 = (b,a,c);" assert newnex.trees[2] == "tree 3 = (b,c,a);"
def test_tally_by_taxon(): nex = NexusReader.from_string("""Begin data; Dimensions ntax=3 nchar=6; Format datatype=standard symbols="12" gap=-; Matrix Harry 0111-? Simon 0011-? Elvis 0001-? ;""" ) tally = tally_by_taxon(nex) # sites that are zero assert tally['Harry']['0'] == [0] assert tally['Simon']['0'] == [0, 1] assert tally['Elvis']['0'] == [0, 1, 2] # sites that are 1 assert tally['Harry']['1'] == [1, 2, 3] assert tally['Simon']['1'] == [2, 3] assert tally['Elvis']['1'] == [3] # sites that are - assert tally['Harry']['-'] == [4] assert tally['Simon']['-'] == [4] assert tally['Elvis']['-'] == [4] # sites that are ? assert tally['Harry']['?'] == [5] assert tally['Simon']['?'] == [5] assert tally['Elvis']['?'] == [5]
def test_find_unique_sites_2(): nexus = NexusReader.from_string("""Begin data; Dimensions ntax=4 nchar=7; Format datatype=standard symbols="01" gap=-; Matrix Harry 10000?- Simon 1100011 Betty 1110000 Louise 1111000 ;""") unique = list(iter_unique_sites(nexus)) # site 1 should NOT be in the uniques (3x1 and 1x0) # - i.e. are we ignoring sites with ONE absent taxon assert 1 not in unique # these should also NOT be in unique assert 0 not in unique assert 2 not in unique assert 4 not in unique # constant # site 3 is a simple unique site - check we found it assert 3 in unique # sites 5 and 6 should also be unique # - are we handling missing data appropriately? assert 5 in unique assert 6 in unique
def test_tally_by_site(): nex = NexusReader.from_string("""Begin data; Dimensions ntax=3 nchar=6; Format datatype=standard symbols="12" gap=-; Matrix Harry 0111-? Simon 0011-? Elvis 0001-? ;""") tally = tally_by_site(nex) # 000 assert 'Harry' in tally[0]['0'] assert 'Simon' in tally[0]['0'] assert 'Elvis' in tally[0]['0'] # 100 assert 'Harry' in tally[1]['1'] assert 'Simon' in tally[1]['0'] assert 'Elvis' in tally[1]['0'] # 110 assert 'Harry' in tally[2]['1'] assert 'Simon' in tally[2]['1'] assert 'Elvis' in tally[2]['0'] # 111 assert 'Harry' in tally[3]['1'] assert 'Simon' in tally[3]['1'] assert 'Elvis' in tally[3]['1'] # --- assert 'Harry' in tally[4]['-'] assert 'Simon' in tally[4]['-'] assert 'Elvis' in tally[4]['-'] # ??? assert 'Harry' in tally[5]['?'] assert 'Simon' in tally[5]['?'] assert 'Elvis' in tally[5]['?']
def test_combine_with_character_labels(): n1 = NexusReader.from_string(""" BEGIN DATA; DIMENSIONS NTAX=3 NCHAR=3; FORMAT DATATYPE=STANDARD MISSING=0 GAP=- SYMBOLS="123"; CHARSTATELABELS 1 char1, 2 char2, 3 char3 ; MATRIX Tax1 123 Tax2 123 Tax3 123 ; """) n2 = NexusReader.from_string(""" BEGIN DATA; DIMENSIONS NTAX=3 NCHAR=3; FORMAT DATATYPE=STANDARD MISSING=0 GAP=- SYMBOLS="456"; CHARSTATELABELS 1 char1, 2 char2, 3 char3 ; MATRIX Tax1 456 Tax2 456 Tax3 456 ; """) newnex = combine_nexuses([n1, n2]) assert re.search(r"""\bNTAX=3\b""", newnex.write()) assert re.search(r"""\bNCHAR=6\b""", newnex.write()) assert re.search(r'\sSYMBOLS="123456"[\s;]', newnex.write()) for tax in [1, 2, 3]: assert re.search(r"""\bTax%d\s+123456\b""" % tax, newnex.write()) counter = 1 for nex_id in [1, 2]: for char_id in [1, 2, 3]: assert re.search( r"""\b%d\s+%d.char%d\b""" % (counter, nex_id, char_id), newnex.write(charblock=True)) counter += 1
def test_error_on_too_many_states(): nex = NexusReader.from_string(""" Begin data; Dimensions ntax=1 nchar=30; Format datatype=standard symbols="01" gap=-; Matrix A 111111111111111111111111111111 ;""") with pytest.raises(ValueError): multistatise(nex)
def nex(): res = NexusReader.from_string(""" Begin data; Dimensions ntax=4 nchar=4; Format datatype=standard symbols="01" gap=-; Matrix Harry 1000 Simon 0100 Betty 0010 Louise 0001 ;""") return multistatise(res)
def nex(): res = NexusReader.from_string(""" Begin data; Dimensions ntax=3 nchar=2; Format datatype=standard symbols="01" gap=-; Charstatelabels 1 char1, 2 char2; Matrix Maori 14 Dutch 25 Latin 36 ;""") return binarise(res)
def nex(): nex = NexusReader.from_string(""" Begin data; Dimensions ntax=4 nchar=8; Format datatype=standard symbols="01" gap=-; Matrix [ 01234567] Harry 01000000 Simon 0010000- Betty 00010-0? Louise 000010?0 ;""") return nex
def nex1(): res = NexusReader.from_string("""Begin data; Dimensions ntax=2 nchar=1; Format datatype=standard symbols="12" gap=-; Matrix Harry 1 Simon 2 ;""") # set short_filename to test that functionality. If `combine_nexuses` # doesn't use `short_filename`, then the nex1 characters will be # identified as 1.xx, rather than 0.xx res.short_filename = '0' return res
def test_count_binary_set_size(): nex = NexusReader.from_string("""Begin data; Dimensions ntax=3 nchar=4; Format datatype=standard symbols="12" gap=-; Matrix Harry 0111 Simon 0011 Elvis 0001 ;""") tally = count_binary_set_size(nex) assert tally[0] == 1 assert tally[1] == 1 assert tally[2] == 1 assert tally[3] == 1
def test_count_other_values_two(): expected = {'Harry': 1, 'Simon': 2, 'Peter': 1, 'Betty': 0, 'Louise': 0} nexus = NexusReader.from_string("""#NEXUS Begin data; Dimensions ntax=5 nchar=3; Format datatype=standard symbols="01" gap=-; Matrix Harry 0A0 [No missing] Simon 0AB [one missing] Peter 0-B [one gap] Betty ?-1 [one gap and one missing = 2 missing] Louise ??? [three missing] ; End; """) count = count_site_values(nexus, ['A', 'B']) for taxon in count: assert count[taxon] == expected[taxon]
def test_count_missing_two(): expected = {'Harry': 0, 'Simon': 1, 'Peter': 1, 'Betty': 2, 'Louise': 3} nexus = NexusReader.from_string("""#NEXUS Begin data; Dimensions ntax=5 nchar=3; Format datatype=standard symbols="01" gap=-; Matrix Harry 010 [No missing] Simon 0?0 [one missing] Peter 0-0 [one gap] Betty ?-1 [one gap and one missing = 2 missing] Louise ??? [three missing] ; End; """) missing = count_site_values(nexus) for taxon in missing: assert missing[taxon] == expected[taxon]
def get_reader(args, many=False, required_blocks=None): res = [] for f in (args.filename if many else [args.filename]): if f is None: res.append(NexusReader.from_string(sys.stdin.read())) else: res.append(NexusReader.from_file(f)) if required_blocks: for nex in res: for block in required_blocks: if not getattr(nex, block, None): raise ParserError( colored( 'Nexus file {0} has no {1} block'.format( nex.filename, block), 'red', attrs=['bold'], )) return res if many else res[0]
def test_regression_include_invisible_taxa(): """Include taxa that have no entries""" data = """ #NEXUS BEGIN DATA; DIMENSIONS NTAX=15 NCHAR=7; FORMAT DATATYPE=STANDARD MISSING=? GAP=- INTERLEAVE=YES; MATRIX Gertrude 0000001 Debbie 0001000 Zarathrustra 0000000 Christie 0010000 Benny 0100000 Bertha 0100000 Craig 0010000 Fannie-May 0000010 Charles 0010000 Annik 1000000 Frank 0000010 Amber 1000000 Andreea 1000000 Edward 0000100 Donald 0001000 ; END; """ nex = NexusReader.from_string(data) msnex = multistatise(nex) for taxon, sites in msnex.data.matrix.items(): if taxon[0] == 'Z': continue # will check later # first letter of taxa name is the expected character state assert taxon[0] == sites[0], \ "%s should be %s not %s" % (taxon, taxon[0], sites[0]) # deal with completely missing taxa assert 'Zarathrustra' in msnex.data.matrix assert msnex.data.matrix['Zarathrustra'][0] == '?'
def test_to_binary_alphabetical(): """Test Nexus -> Binary: alphabetical states""" nex = binarise( NexusReader.from_string(""" #NEXUS BEGIN DATA; DIMENSIONS NTAX=5 NCHAR=2; FORMAT MISSING=? GAP=- SYMBOLS="ABCDE"; CHARSTATELABELS 1 ALL, 2 ASHES ; MATRIX Mehri AB Geto AB Walani A- Hebrew A(C,D) Soqotri BC ; END; """)) nexus = nex.make_nexus(charblock=True, interleave=False) assert re.search(r"\s+NCHAR=5;", nexus) assert re.search(r"1\s+ALL_A,", nexus) assert re.search(r"2\s+ALL_B,", nexus) assert re.search(r"3\s+ASHES_B,", nexus) assert re.search(r"4\s+ASHES_C,", nexus) assert re.search(r"5\s+ASHES_D", nexus) assert re.search(r"Geto\s+10100", nexus) assert re.search(r"Hebrew\s+10011", nexus) assert re.search(r"Mehri\s+10100", nexus) assert re.search(r"Soqotri\s+01010", nexus) assert re.search(r"Walani\s+10000", nexus)