def new_nexus_without_sites(nexus_obj, sites_to_remove): """ Returns a new NexusReader instance with the sites in `sites_to_remove` removed. :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param sites_to_remove: A list of site numbers :type sites_to_remove: List :return: A NexusWriter instance :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) # make new nexus nexout = NexusWriter() nexout.add_comment( "Removed %d sites: %s" % (len(sites_to_remove), ",".join(["%s" % s for s in sites_to_remove])) ) new_sitepos = 0 for sitepos in range(nexus_obj.data.nchar): if sitepos in sites_to_remove: continue # skip! for taxon, data in nexus_obj.data: nexout.add(taxon, new_sitepos, data[sitepos]) new_sitepos += 1 return nexout
def test_regression_format_string_has_quoted_symbols(self): """Regression: Symbols in the format string should be quoted""" nex = NexusWriter() for char, b in data.items(): for taxon, value in b.items(): nex.add(taxon, char, value) out = nex.make_nexus() assert 'SYMBOLS="123456"' in out
def test_regression_format_string_has_datatype_first(self): """ Regression: Format string should contain 'datatype' as the first element """ # SplitsTree complains otherwise. nex = NexusWriter() for char, b in data.items(): for taxon, value in b.items(): nex.add(taxon, char, value) out = nex.make_nexus() assert "FORMAT DATATYPE=STANDARD" in out
def binarise(nexus_obj, one_nexus_per_block=False, keep_zero=False): """ Returns a binary variant of the given `nexus_obj`. If `one_nexus_per_block` then we return a list of NexusWriter instances. :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param one_nexus_per_block: Whether to return a single NexusWriter, or a list of NexusWriter's (one per character) :type one_nexus_per_block: Boolean :param keep_zero: A boolean flag denoting whether to treat '0' as a missing state or not. The default (False) is to ignore '0' as a trait absence. Setting this to True will treat '0' as a unique state. :type keep_zero: Boolean :return: A NexusWriter instance or a list of NexusWriter instances. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) nexuslist = [] n = NexusWriter() for i in sorted(nexus_obj.data.charlabels): label = nexus_obj.data.charlabels[i] # character label char = nexus_obj.data.characters[label] # character dict recoding = _recode_to_binary(char, keep_zero) # recode new_char_length = len(recoding[list(recoding.keys())[0]]) # loop over recoded data for j in range(new_char_length): for taxon, state in recoding.items(): # make new label new_label = "%s_%d" % (str(label), j) # add to nexus n.add(taxon, new_label, state[j]) if one_nexus_per_block: nexuslist.append(n) n = NexusWriter() if one_nexus_per_block: return nexuslist else: return n
def test_mixed_type_characters(): n = NexusWriter() n.add('taxon1', 'Character1', 'A') n.add('taxon2', 'Character1', 'C') n.add('taxon3', 'Character1', 'A') with pytest.raises(AssertionError): n.add('taxon1', 2, 1, check=True)
def multistatise(nexus_obj, charlabel=None): """ Returns a multistate variant of the given `nexus_obj`. :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :return: A NexusReader instance :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block """ charlabel = charlabel or getattr(nexus_obj, 'short_filename', 1) or 1 states = {} for taxon in nexus_obj.data.matrix: states[taxon] = [] sequence = nexus_obj.data.matrix[taxon] for site_idx, value in enumerate(sequence): if site_idx > 26: raise ValueError( "Too many characters to handle! - run out of A-Z") assert value == str(value), "%r is not a string" % value if value == '1': states[taxon].append(chr(65 + site_idx)) nexout = NexusWriter() for taxon in states: if not states[taxon]: nexout.add(taxon, charlabel, '?') else: for s in states[taxon]: nexout.add(taxon, charlabel, s) return nexout._convert_to_reader()
def writer(): data = { 'char1': { 'French': 1, 'English': 2, 'Latin': 3 }, 'char2': { 'French': 4, 'English': 5, 'Latin': 6 }, } res = NexusWriter() for char in data: for taxon, value in data[char].items(): res.add(taxon, char, value) return res
def shufflenexus(nexus_obj, resample=False): """ Shuffles the characters between each taxon to create a new nexus :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param resample: The number of characters to resample. If set to False, then the number of characters will equal the number of characters in the original data file. :type resample: Integer :return: A shuffled NexusReader instance :raises AssertionError: if nexus_obj is not a nexus :raises ValueError: if resample is not False or a positive Integer :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) if resample is False: resample = nexus_obj.data.nchar try: resample = int(resample) except ValueError: raise ValueError('resample must be a positive integer or False!') if resample < 1: raise ValueError('resample must be a positive integer or False!') newnexus = NexusWriter() newnexus.add_comment( "Randomised Nexus generated from %s" % nexus_obj.filename ) for i in range(resample): # pick existing character character = randrange(0, nexus_obj.data.nchar) chars = nexus_obj.data.characters[character] site_values = [chars[taxon] for taxon in nexus_obj.data.taxa] shuffle(site_values) for taxon in nexus_obj.data.taxa: newnexus.add(taxon, i, site_values.pop(0)) return newnexus
def new_nexus_without_sites(nexus_obj, sites_to_remove): """ Returns a new NexusReader instance with the sites in `sites_to_remove` removed. :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param sites_to_remove: A list of site numbers :type sites_to_remove: List :return: A NexusWriter instance :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block """ # make new nexus nexout = NexusWriter() nexout.add_comment( "Removed %d sites: %s" % (len(sites_to_remove), ",".join(["%s" % s for s in sites_to_remove]))) new_sitepos = 0 for sitepos in range(nexus_obj.data.nchar): if sitepos in sites_to_remove: continue # skip! for taxon, data in nexus_obj.data: nexout.add(taxon, new_sitepos, data[sitepos]) new_sitepos += 1 return nexout
def combine_nexuses(nexuslist): """ Combines a list of NexusReader instances into a single nexus :param nexuslist: A list of NexusReader instances :type nexuslist: List :return: A NexusWriter instance :raises TypeError: if nexuslist is not a list of NexusReader instances :raises IOError: if unable to read an file in nexuslist :raises NexusFormatException: if a nexus file does not have a `data` block """ out = NexusWriter() charpos = 0 for nex_id, nex in enumerate(nexuslist, 1): check_for_valid_NexusReader(nex, required_blocks=['data']) if hasattr(nex, 'short_filename'): nexus_label = os.path.splitext(nex.short_filename)[0] elif hasattr(nex, 'label'): nexus_label = nex.label else: nexus_label = str(nex_id) out.add_comment( "%d - %d: %s" % (charpos, charpos + nex.data.nchar - 1, nexus_label) ) for site_idx, site in enumerate(sorted(nex.data.characters), 0): data = nex.data.characters.get(site) charpos += 1 # work out character label charlabel = nex.data.charlabels.get(site_idx, site_idx + 1) label = '%s.%s' % (nexus_label, charlabel) for taxon, value in data.items(): out.add(taxon, label, value) return out
def binarise(nexus_obj, keep_zero=False): """ Returns a binary variant of the given `nexus_obj`. If `one_nexus_per_block` then we return a list of NexusWriter instances. :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param keep_zero: A boolean flag denoting whether to treat '0' as a missing state or not. The default (False) is to ignore '0' as a trait absence. Setting this to True will treat '0' as a unique state. :type keep_zero: Boolean :return: A NexusWriter instance or a list of NexusWriter instances. :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) n = NexusWriter() for i in sorted(nexus_obj.data.charlabels): label = nexus_obj.data.charlabels[i] # character label char = nexus_obj.data.characters[label] # character dict recoding = _recode_to_binary(char, keep_zero) # recode new_char_length = len(recoding[list(recoding.keys())[0]]) # loop over recoded data for j in range(new_char_length): for taxon, state in recoding.items(): # make new label new_label = "%s_%d" % (str(label), j) # add to nexus n.add(taxon, new_label, state[j]) return n
def shufflenexus(nexus_obj, resample=False): """ Shuffles the characters between each taxon to create a new nexus :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :param resample: The number of characters to resample. If set to False, then the number of characters will equal the number of characters in the original data file. :type resample: Integer :return: A shuffled NexusReader instance :raises AssertionError: if nexus_obj is not a nexus :raises ValueError: if resample is not False or a positive Integer :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) if resample is False: resample = nexus_obj.data.nchar try: resample = int(resample) except ValueError: raise ValueError('resample must be a positive integer or False!') if resample < 1: raise ValueError('resample must be a positive integer or False!') newnexus = NexusWriter() newnexus.add_comment("Randomised Nexus generated from %s" % nexus_obj.filename) for i in range(resample): # pick existing character character = randrange(0, nexus_obj.data.nchar) chars = nexus_obj.data.characters[character] site_values = [chars[taxon] for taxon in nexus_obj.data.taxa] shuffle(site_values) for taxon in nexus_obj.data.taxa: newnexus.add(taxon, i, site_values.pop(0)) return newnexus
def multistatise(nexus_obj, charlabel=None): """ Returns a multistate variant of the given `nexus_obj`. :param nexus_obj: A `NexusReader` instance :type nexus_obj: NexusReader :return: A NexusReader instance :raises AssertionError: if nexus_obj is not a nexus :raises NexusFormatException: if nexus_obj does not have a `data` block """ check_for_valid_NexusReader(nexus_obj, required_blocks=['data']) if not charlabel: charlabel = getattr(nexus_obj, 'short_filename', 1) states = {} for taxon in nexus_obj.data.matrix: states[taxon] = [] sequence = nexus_obj.data.matrix[taxon] for site_idx, value in enumerate(sequence): if site_idx > 26: raise ValueError( "Too many characters to handle! - run out of A-Z" ) assert value == str(value), "%r is not a string" % value if value == '1': states[taxon].append(chr(65 + site_idx)) nexout = NexusWriter() for taxon in states: if len(states[taxon]) == 0: nexout.add(taxon, charlabel, '?') else: for s in states[taxon]: nexout.add(taxon, charlabel, s) return nexout._convert_to_reader()
def combine_nexuses(nexuslist): """ Combines a list of NexusReader instances into a single nexus :param nexuslist: A list of NexusReader instances :type nexuslist: List :return: A NexusWriter instance :raises TypeError: if nexuslist is not a list of NexusReader instances :raises IOError: if unable to read an file in nexuslist :raises NexusFormatException: if a nexus file does not have a `data` block """ out = NexusWriter() charpos = 0 for nex_id, nex in enumerate(nexuslist, 1): check_for_valid_NexusReader(nex, required_blocks=['data']) if hasattr(nex, 'short_filename'): nexus_label = os.path.splitext(nex.short_filename)[0] else: nexus_label = str(nex_id) out.add_comment("%d - %d: %s" % (charpos, charpos + nex.data.nchar - 1, nexus_label)) for site_idx, site in enumerate(sorted(nex.data.characters), 0): data = nex.data.characters.get(site) charpos += 1 # work out character label charlabel = nex.data.charlabels.get(site_idx, site_idx + 1) label = '%s.%s' % (nexus_label, charlabel) for taxon, value in data.items(): out.add(taxon, label, value) return out
class Test_NexusWriter(unittest.TestCase): def setUp(self): self.nex = NexusWriter() for char in data: for taxon, value in data[char].items(): self.nex.add(taxon, char, value) def test_char_adding1(self): """Test Character Addition 1""" assert self.nex.data['char1']['French'] == '1' assert self.nex.data['char1']['English'] == '2' assert self.nex.data['char1']['Latin'] == '3' def test_char_adding2(self): """Test Character Addition 2""" assert self.nex.data['char2']['French'] == '4' assert self.nex.data['char2']['English'] == '5' assert self.nex.data['char2']['Latin'] == '6' def test_char_adding_integer(self): """Test Character Addition as integer""" self.nex.add('French', 'char3', 9) self.nex.add('English', 'char3', '9') assert self.nex.data['char3']['French'] == '9' assert self.nex.data['char3']['French'] == '9' def test_characters(self): assert 'char1' in self.nex.characters assert 'char2' in self.nex.characters def test_taxa(self): assert 'French' in self.nex.taxa assert 'English' in self.nex.taxa assert 'Latin' in self.nex.taxa def test_remove(self): self.nex.remove("French", "char2") assert 'French' not in self.nex.data['char2'] assert 'French' in self.nex.taxa def test_remove_character(self): self.nex.remove_character("char2") assert len(self.nex.characters) == 1 assert 'char2' not in self.nex.data def test_remove_taxon(self): self.nex.remove_taxon("French") assert 'French' not in self.nex.taxa for char in self.nex.data: assert 'French' not in self.nex.data[char] n = self.nex.make_nexus(interleave=False) assert re.search("DIMENSIONS NTAX=2 NCHAR=2;", n) assert 'French' not in n def test_nexus_noninterleave(self): """Test Nexus Generation - Non-Interleaved""" n = self.nex.make_nexus(interleave=False) assert re.search("#NEXUS", n) assert re.search("BEGIN DATA;", n) assert re.search("DIMENSIONS NTAX=3 NCHAR=2;", n) assert re.search("MATRIX", n) assert re.search("Latin\s+36", n) assert re.search("French\s+14", n) assert re.search("English\s+25", n) assert re.search("FORMAT.*MISSING\=(.+?)", n).groups()[0] == '?' assert re.search("FORMAT.*DATATYPE\=(\w+)\s", n).groups()[0] \ == 'STANDARD' assert re.search('FORMAT.*SYMBOLS\="(\d+)";', n).groups()[0] \ == '123456' def test_nexus_charblock(self): """Test Nexus Generation - with characters block""" n = self.nex.make_nexus(charblock=True) assert re.search("#NEXUS", n) assert re.search("BEGIN DATA;", n) assert re.search("DIMENSIONS NTAX=3 NCHAR=2;", n) assert re.search("CHARSTATELABELS", n) assert re.search("1 char1,", n) assert re.search("2 char2", n) assert re.search("MATRIX", n) assert re.search("Latin\s+36", n) assert re.search("French\s+14", n) assert re.search("English\s+25", n) assert re.search("FORMAT.*MISSING\=(.+?)", n).groups()[0] == '?' assert re.search("FORMAT.*DATATYPE\=(\w+)\s", n).groups()[0] \ == 'STANDARD' assert re.search('FORMAT.*SYMBOLS\="(\d+)";', n).groups()[0] \ == '123456' def test_nexus_interleave(self): """Test Nexus Generation - Interleaved""" n = self.nex.make_nexus(interleave=True) assert re.search("#NEXUS", n) assert re.search("BEGIN DATA;", n) assert re.search("DIMENSIONS NTAX=3 NCHAR=2;", n) assert re.search("MATRIX", n) # char1 assert re.search("Latin\s+3", n) assert re.search("French\s+1", n) assert re.search("English\s+2", n) # char2 assert re.search("Latin\s+6", n) assert re.search("French\s+4", n) assert re.search("English\s+5", n) assert re.search("FORMAT.*MISSING\=(.+?)", n).groups()[0] == '?' assert re.search("FORMAT.*DATATYPE\=(\w+)\s", n).groups()[0] == \ 'STANDARD' assert re.search("FORMAT.*(INTERLEAVE)", n).groups()[0] == \ 'INTERLEAVE' assert re.search('FORMAT.*SYMBOLS\="(\d+)";', n).groups()[0] == \ '123456' def test_polymorphic_characters(self): self.nex.add("French", "char1", 2) self.assertEqual(self.nex.data['char1']['French'], "12") n = self.nex.make_nexus(charblock=True) assert re.search("DIMENSIONS NTAX=3 NCHAR=2;", n) # no change assert re.search("French\s+\(12\)4", n) def test_write_to_file(self): tmp = NamedTemporaryFile(delete=False, suffix=".nex") tmp.close() self.nex.write_to_file(tmp.name) assert os.path.isfile(tmp.name) with open(tmp.name, 'r') as handle: n = handle.read() assert re.search("#NEXUS", n) assert re.search("BEGIN DATA;", n) assert re.search("DIMENSIONS NTAX=3 NCHAR=2;", n) assert re.search("MATRIX", n) assert re.search("Latin\s+36", n) assert re.search("French\s+14", n) assert re.search("English\s+25", n) assert re.search("FORMAT.*MISSING\=(.+?)", n).groups()[0] == '?' assert re.search("FORMAT.*DATATYPE\=(\w+)\s", n).groups()[0] \ == 'STANDARD' assert re.search('FORMAT.*SYMBOLS\="(\d+)";', n).groups()[0] \ == '123456' os.unlink(tmp.name) # cleanup def test_write_as_table(self): content = self.nex.write_as_table() assert re.search("Latin\s+36", content) assert re.search("French\s+14", content) assert re.search("English\s+25", content) assert len(content.split("\n")) == 3 def test_write_as_table_with_polymorphoc(self): self.nex.add('French', 'char1', '2') content = self.nex.write_as_table() assert re.search("Latin\s+36", content) assert re.search("French\s+\(12\)4", content) assert re.search("English\s+25", content) assert len(content.split("\n")) == 3
def setUp(self): self.nex = NexusWriter() for char in data: for taxon, value in data[char].items(): self.nex.add(taxon, char, value)
def test_invalid(): w = NexusWriter() with pytest.raises(ValueError): w.write()
def test_write_with_no_data_but_trees(): nex = NexusWriter() nex.trees.append('tree tree1 = (French,(English,Latin));') content = nex.write() assert re.search(r"BEGIN TREES", content) assert re.search(r"tree tree1 = \(French,\(English,Latin\)\);", content)