예제 #1
0
def new_nexus_without_sites(nexus_obj, sites_to_remove):
    """
    Returns a new NexusReader instance with the sites in
    `sites_to_remove` removed.

    :param nexus_obj: A `NexusReader` instance
    :type nexus_obj: NexusReader

    :param sites_to_remove: A list of site numbers
    :type sites_to_remove: List

    :return: A NexusWriter instance
    :raises AssertionError: if nexus_obj is not a nexus
    :raises NexusFormatException: if nexus_obj does not have a `data` block
    """
    check_for_valid_NexusReader(nexus_obj, required_blocks=['data'])

    # make new nexus
    nexout = NexusWriter()
    nexout.add_comment(
        "Removed %d sites: %s" %
        (len(sites_to_remove), ",".join(["%s" % s for s in sites_to_remove]))
    )
    new_sitepos = 0
    for sitepos in range(nexus_obj.data.nchar):
        if sitepos in sites_to_remove:
            continue  # skip!
        for taxon, data in nexus_obj.data:
            nexout.add(taxon, new_sitepos, data[sitepos])
        new_sitepos += 1
    return nexout
예제 #2
0
 def test_regression_format_string_has_quoted_symbols(self):
     """Regression: Symbols in the format string should be quoted"""
     nex = NexusWriter()
     for char, b in data.items():
         for taxon, value in b.items():
             nex.add(taxon, char, value)
     out = nex.make_nexus()
     assert 'SYMBOLS="123456"' in out
예제 #3
0
 def test_regression_format_string_has_datatype_first(self):
     """
     Regression: Format string should contain 'datatype' as the first
     element
     """
     # SplitsTree complains otherwise.
     nex = NexusWriter()
     for char, b in data.items():
         for taxon, value in b.items():
             nex.add(taxon, char, value)
     out = nex.make_nexus()
     assert "FORMAT DATATYPE=STANDARD" in out
예제 #4
0
def binarise(nexus_obj, one_nexus_per_block=False, keep_zero=False):
    """
    Returns a binary variant of the given `nexus_obj`.
    If `one_nexus_per_block` then we return a list of NexusWriter instances.

    :param nexus_obj: A `NexusReader` instance
    :type nexus_obj: NexusReader

    :param one_nexus_per_block: Whether to return a single NexusWriter, or a
                                list of NexusWriter's (one per character)
    :type one_nexus_per_block: Boolean

    :param keep_zero: A boolean flag denoting whether to
        treat '0' as a missing state or not. The default
        (False) is to ignore '0' as a trait absence.

        Setting this to True will treat '0' as a unique
        state.
    :type keep_zero: Boolean

    :return: A NexusWriter instance or a list of NexusWriter instances.
    :raises AssertionError: if nexus_obj is not a nexus
    :raises NexusFormatException: if nexus_obj does not have a `data` block
    """
    check_for_valid_NexusReader(nexus_obj, required_blocks=['data'])
    nexuslist = []
    n = NexusWriter()

    for i in sorted(nexus_obj.data.charlabels):
        label = nexus_obj.data.charlabels[i]  # character label
        char = nexus_obj.data.characters[label]  # character dict
        recoding = _recode_to_binary(char, keep_zero)  # recode
        
        new_char_length = len(recoding[list(recoding.keys())[0]])
        
        # loop over recoded data
        for j in range(new_char_length):
            for taxon, state in recoding.items():
                # make new label
                new_label = "%s_%d" % (str(label), j)
                # add to nexus
                n.add(taxon, new_label, state[j])
            
        if one_nexus_per_block:
            nexuslist.append(n)
            n = NexusWriter()

    if one_nexus_per_block:
        return nexuslist
    else:
        return n
예제 #5
0
def test_mixed_type_characters():
    n = NexusWriter()
    n.add('taxon1', 'Character1', 'A')
    n.add('taxon2', 'Character1', 'C')
    n.add('taxon3', 'Character1', 'A')
    with pytest.raises(AssertionError):
        n.add('taxon1', 2, 1, check=True)
예제 #6
0
def multistatise(nexus_obj, charlabel=None):
    """
    Returns a multistate variant of the given `nexus_obj`.

    :param nexus_obj: A `NexusReader` instance
    :type nexus_obj: NexusReader

    :return: A NexusReader instance
    :raises AssertionError: if nexus_obj is not a nexus
    :raises NexusFormatException: if nexus_obj does not have a `data` block
    """
    charlabel = charlabel or getattr(nexus_obj, 'short_filename', 1) or 1

    states = {}
    for taxon in nexus_obj.data.matrix:
        states[taxon] = []
        sequence = nexus_obj.data.matrix[taxon]
        for site_idx, value in enumerate(sequence):
            if site_idx > 26:
                raise ValueError(
                    "Too many characters to handle! - run out of A-Z")
            assert value == str(value), "%r is not a string" % value
            if value == '1':
                states[taxon].append(chr(65 + site_idx))

    nexout = NexusWriter()
    for taxon in states:
        if not states[taxon]:
            nexout.add(taxon, charlabel, '?')
        else:
            for s in states[taxon]:
                nexout.add(taxon, charlabel, s)
    return nexout._convert_to_reader()
예제 #7
0
def writer():
    data = {
        'char1': {
            'French': 1,
            'English': 2,
            'Latin': 3
        },
        'char2': {
            'French': 4,
            'English': 5,
            'Latin': 6
        },
    }
    res = NexusWriter()
    for char in data:
        for taxon, value in data[char].items():
            res.add(taxon, char, value)
    return res
예제 #8
0
def shufflenexus(nexus_obj, resample=False):
    """
    Shuffles the characters between each taxon to create a new nexus

    :param nexus_obj: A `NexusReader` instance
    :type nexus_obj: NexusReader

    :param resample: The number of characters to resample. If set to False,
        then the number of characters will equal the number of characters
        in the original data file.
    :type resample: Integer

    :return: A shuffled NexusReader instance
    :raises AssertionError: if nexus_obj is not a nexus
    :raises ValueError: if resample is not False or a positive Integer
    :raises NexusFormatException: if nexus_obj does not have a `data` block
    """
    check_for_valid_NexusReader(nexus_obj, required_blocks=['data'])

    if resample is False:
        resample = nexus_obj.data.nchar

    try:
        resample = int(resample)
    except ValueError:
        raise ValueError('resample must be a positive integer or False!')

    if resample < 1:
        raise ValueError('resample must be a positive integer or False!')

    newnexus = NexusWriter()
    newnexus.add_comment(
        "Randomised Nexus generated from %s" % nexus_obj.filename
    )

    for i in range(resample):
        # pick existing character
        character = randrange(0, nexus_obj.data.nchar)
        chars = nexus_obj.data.characters[character]
        site_values = [chars[taxon] for taxon in nexus_obj.data.taxa]
        shuffle(site_values)
        for taxon in nexus_obj.data.taxa:
            newnexus.add(taxon, i, site_values.pop(0))
    return newnexus
예제 #9
0
def new_nexus_without_sites(nexus_obj, sites_to_remove):
    """
    Returns a new NexusReader instance with the sites in
    `sites_to_remove` removed.

    :param nexus_obj: A `NexusReader` instance
    :type nexus_obj: NexusReader

    :param sites_to_remove: A list of site numbers
    :type sites_to_remove: List

    :return: A NexusWriter instance
    :raises AssertionError: if nexus_obj is not a nexus
    :raises NexusFormatException: if nexus_obj does not have a `data` block
    """
    # make new nexus
    nexout = NexusWriter()
    nexout.add_comment(
        "Removed %d sites: %s" %
        (len(sites_to_remove), ",".join(["%s" % s for s in sites_to_remove])))
    new_sitepos = 0
    for sitepos in range(nexus_obj.data.nchar):
        if sitepos in sites_to_remove:
            continue  # skip!
        for taxon, data in nexus_obj.data:
            nexout.add(taxon, new_sitepos, data[sitepos])
        new_sitepos += 1
    return nexout
예제 #10
0
def combine_nexuses(nexuslist):
    """
    Combines a list of NexusReader instances into a single nexus

    :param nexuslist: A list of NexusReader instances
    :type nexuslist: List

    :return: A NexusWriter instance

    :raises TypeError: if nexuslist is not a list of NexusReader instances
    :raises IOError: if unable to read an file in nexuslist
    :raises NexusFormatException: if a nexus file does not have a `data` block
    """
    out = NexusWriter()
    charpos = 0
    for nex_id, nex in enumerate(nexuslist, 1):
        check_for_valid_NexusReader(nex, required_blocks=['data'])

        if hasattr(nex, 'short_filename'):
            nexus_label = os.path.splitext(nex.short_filename)[0]
        elif hasattr(nex, 'label'):
            nexus_label = nex.label
        else:
            nexus_label = str(nex_id)
        
        out.add_comment(
            "%d - %d: %s" %
            (charpos, charpos + nex.data.nchar - 1, nexus_label)
        )
        
        for site_idx, site in enumerate(sorted(nex.data.characters), 0):
            data = nex.data.characters.get(site)
            charpos += 1
            # work out character label
            charlabel = nex.data.charlabels.get(site_idx, site_idx + 1)
            label = '%s.%s' % (nexus_label, charlabel)

            for taxon, value in data.items():
                out.add(taxon, label, value)
    return out
예제 #11
0
def binarise(nexus_obj, keep_zero=False):
    """
    Returns a binary variant of the given `nexus_obj`.
    If `one_nexus_per_block` then we return a list of NexusWriter instances.

    :param nexus_obj: A `NexusReader` instance
    :type nexus_obj: NexusReader

    :param keep_zero: A boolean flag denoting whether to
        treat '0' as a missing state or not. The default
        (False) is to ignore '0' as a trait absence.

        Setting this to True will treat '0' as a unique
        state.
    :type keep_zero: Boolean

    :return: A NexusWriter instance or a list of NexusWriter instances.
    :raises AssertionError: if nexus_obj is not a nexus
    :raises NexusFormatException: if nexus_obj does not have a `data` block
    """
    check_for_valid_NexusReader(nexus_obj, required_blocks=['data'])
    n = NexusWriter()

    for i in sorted(nexus_obj.data.charlabels):
        label = nexus_obj.data.charlabels[i]  # character label
        char = nexus_obj.data.characters[label]  # character dict
        recoding = _recode_to_binary(char, keep_zero)  # recode
        
        new_char_length = len(recoding[list(recoding.keys())[0]])
        
        # loop over recoded data
        for j in range(new_char_length):
            for taxon, state in recoding.items():
                # make new label
                new_label = "%s_%d" % (str(label), j)
                # add to nexus
                n.add(taxon, new_label, state[j])
    return n
예제 #12
0
def shufflenexus(nexus_obj, resample=False):
    """
    Shuffles the characters between each taxon to create a new nexus

    :param nexus_obj: A `NexusReader` instance
    :type nexus_obj: NexusReader

    :param resample: The number of characters to resample. If set to False,
        then the number of characters will equal the number of characters
        in the original data file.
    :type resample: Integer

    :return: A shuffled NexusReader instance
    :raises AssertionError: if nexus_obj is not a nexus
    :raises ValueError: if resample is not False or a positive Integer
    :raises NexusFormatException: if nexus_obj does not have a `data` block
    """
    check_for_valid_NexusReader(nexus_obj, required_blocks=['data'])

    if resample is False:
        resample = nexus_obj.data.nchar

    try:
        resample = int(resample)
    except ValueError:
        raise ValueError('resample must be a positive integer or False!')

    if resample < 1:
        raise ValueError('resample must be a positive integer or False!')

    newnexus = NexusWriter()
    newnexus.add_comment("Randomised Nexus generated from %s" %
                         nexus_obj.filename)

    for i in range(resample):
        # pick existing character
        character = randrange(0, nexus_obj.data.nchar)
        chars = nexus_obj.data.characters[character]
        site_values = [chars[taxon] for taxon in nexus_obj.data.taxa]
        shuffle(site_values)
        for taxon in nexus_obj.data.taxa:
            newnexus.add(taxon, i, site_values.pop(0))
    return newnexus
예제 #13
0
def multistatise(nexus_obj, charlabel=None):
    """
    Returns a multistate variant of the given `nexus_obj`.

    :param nexus_obj: A `NexusReader` instance
    :type nexus_obj: NexusReader

    :return: A NexusReader instance
    :raises AssertionError: if nexus_obj is not a nexus
    :raises NexusFormatException: if nexus_obj does not have a `data` block
    """
    check_for_valid_NexusReader(nexus_obj, required_blocks=['data'])
    
    if not charlabel:
        charlabel = getattr(nexus_obj, 'short_filename', 1)
    
    states = {}
    for taxon in nexus_obj.data.matrix:
        states[taxon] = []
        sequence = nexus_obj.data.matrix[taxon]
        for site_idx, value in enumerate(sequence):
            if site_idx > 26:
                raise ValueError(
                    "Too many characters to handle! - run out of A-Z"
                )
            assert value == str(value), "%r is not a string" % value
            if value == '1':
                states[taxon].append(chr(65 + site_idx))
    
    nexout = NexusWriter()
    for taxon in states:
        if len(states[taxon]) == 0:
            nexout.add(taxon, charlabel, '?')
        else:
            for s in states[taxon]:
                nexout.add(taxon, charlabel, s)
    return nexout._convert_to_reader()
예제 #14
0
def combine_nexuses(nexuslist):
    """
    Combines a list of NexusReader instances into a single nexus

    :param nexuslist: A list of NexusReader instances
    :type nexuslist: List

    :return: A NexusWriter instance

    :raises TypeError: if nexuslist is not a list of NexusReader instances
    :raises IOError: if unable to read an file in nexuslist
    :raises NexusFormatException: if a nexus file does not have a `data` block
    """
    out = NexusWriter()
    charpos = 0
    for nex_id, nex in enumerate(nexuslist, 1):
        check_for_valid_NexusReader(nex, required_blocks=['data'])

        if hasattr(nex, 'short_filename'):
            nexus_label = os.path.splitext(nex.short_filename)[0]
        else:
            nexus_label = str(nex_id)

        out.add_comment("%d - %d: %s" %
                        (charpos, charpos + nex.data.nchar - 1, nexus_label))

        for site_idx, site in enumerate(sorted(nex.data.characters), 0):
            data = nex.data.characters.get(site)
            charpos += 1
            # work out character label
            charlabel = nex.data.charlabels.get(site_idx, site_idx + 1)
            label = '%s.%s' % (nexus_label, charlabel)

            for taxon, value in data.items():
                out.add(taxon, label, value)
    return out
예제 #15
0
class Test_NexusWriter(unittest.TestCase):
    def setUp(self):
        self.nex = NexusWriter()
        for char in data:
            for taxon, value in data[char].items():
                self.nex.add(taxon, char, value)

    def test_char_adding1(self):
        """Test Character Addition 1"""
        assert self.nex.data['char1']['French'] == '1'
        assert self.nex.data['char1']['English'] == '2'
        assert self.nex.data['char1']['Latin'] == '3'

    def test_char_adding2(self):
        """Test Character Addition 2"""
        assert self.nex.data['char2']['French'] == '4'
        assert self.nex.data['char2']['English'] == '5'
        assert self.nex.data['char2']['Latin'] == '6'

    def test_char_adding_integer(self):
        """Test Character Addition as integer"""
        self.nex.add('French', 'char3', 9)
        self.nex.add('English', 'char3', '9')
        assert self.nex.data['char3']['French'] == '9'
        assert self.nex.data['char3']['French'] == '9'

    def test_characters(self):
        assert 'char1' in self.nex.characters
        assert 'char2' in self.nex.characters

    def test_taxa(self):
        assert 'French' in self.nex.taxa
        assert 'English' in self.nex.taxa
        assert 'Latin' in self.nex.taxa

    def test_remove(self):
        self.nex.remove("French", "char2")
        assert 'French' not in self.nex.data['char2']
        assert 'French' in self.nex.taxa

    def test_remove_character(self):
        self.nex.remove_character("char2")
        assert len(self.nex.characters) == 1
        assert 'char2' not in self.nex.data

    def test_remove_taxon(self):
        self.nex.remove_taxon("French")
        assert 'French' not in self.nex.taxa
        for char in self.nex.data:
            assert 'French' not in self.nex.data[char]
        n = self.nex.make_nexus(interleave=False)
        assert re.search("DIMENSIONS NTAX=2 NCHAR=2;", n)
        assert 'French' not in n

    def test_nexus_noninterleave(self):
        """Test Nexus Generation - Non-Interleaved"""
        n = self.nex.make_nexus(interleave=False)
        assert re.search("#NEXUS", n)
        assert re.search("BEGIN DATA;", n)
        assert re.search("DIMENSIONS NTAX=3 NCHAR=2;", n)
        assert re.search("MATRIX", n)
        assert re.search("Latin\s+36", n)
        assert re.search("French\s+14", n)
        assert re.search("English\s+25", n)
        assert re.search("FORMAT.*MISSING\=(.+?)", n).groups()[0] == '?'
        assert re.search("FORMAT.*DATATYPE\=(\w+)\s", n).groups()[0] \
            == 'STANDARD'
        assert re.search('FORMAT.*SYMBOLS\="(\d+)";', n).groups()[0] \
            == '123456'

    def test_nexus_charblock(self):
        """Test Nexus Generation - with characters block"""
        n = self.nex.make_nexus(charblock=True)
        assert re.search("#NEXUS", n)
        assert re.search("BEGIN DATA;", n)
        assert re.search("DIMENSIONS NTAX=3 NCHAR=2;", n)
        assert re.search("CHARSTATELABELS", n)
        assert re.search("1 char1,", n)
        assert re.search("2 char2", n)
        assert re.search("MATRIX", n)
        assert re.search("Latin\s+36", n)
        assert re.search("French\s+14", n)
        assert re.search("English\s+25", n)
        assert re.search("FORMAT.*MISSING\=(.+?)", n).groups()[0] == '?'
        assert re.search("FORMAT.*DATATYPE\=(\w+)\s", n).groups()[0] \
            == 'STANDARD'
        assert re.search('FORMAT.*SYMBOLS\="(\d+)";', n).groups()[0] \
            == '123456'

    def test_nexus_interleave(self):
        """Test Nexus Generation - Interleaved"""
        n = self.nex.make_nexus(interleave=True)
        assert re.search("#NEXUS", n)
        assert re.search("BEGIN DATA;", n)
        assert re.search("DIMENSIONS NTAX=3 NCHAR=2;", n)
        assert re.search("MATRIX", n)
        # char1
        assert re.search("Latin\s+3", n)
        assert re.search("French\s+1", n)
        assert re.search("English\s+2", n)
        # char2
        assert re.search("Latin\s+6", n)
        assert re.search("French\s+4", n)
        assert re.search("English\s+5", n)

        assert re.search("FORMAT.*MISSING\=(.+?)", n).groups()[0] == '?'
        assert re.search("FORMAT.*DATATYPE\=(\w+)\s", n).groups()[0] == \
            'STANDARD'
        assert re.search("FORMAT.*(INTERLEAVE)", n).groups()[0] == \
            'INTERLEAVE'
        assert re.search('FORMAT.*SYMBOLS\="(\d+)";', n).groups()[0] == \
            '123456'

    def test_polymorphic_characters(self):
        self.nex.add("French", "char1", 2)
        self.assertEqual(self.nex.data['char1']['French'], "12")
        n = self.nex.make_nexus(charblock=True)
        assert re.search("DIMENSIONS NTAX=3 NCHAR=2;", n)  # no change
        assert re.search("French\s+\(12\)4", n)

    def test_write_to_file(self):
        tmp = NamedTemporaryFile(delete=False, suffix=".nex")
        tmp.close()
        self.nex.write_to_file(tmp.name)
        assert os.path.isfile(tmp.name)

        with open(tmp.name, 'r') as handle:
            n = handle.read()

        assert re.search("#NEXUS", n)
        assert re.search("BEGIN DATA;", n)
        assert re.search("DIMENSIONS NTAX=3 NCHAR=2;", n)
        assert re.search("MATRIX", n)
        assert re.search("Latin\s+36", n)
        assert re.search("French\s+14", n)
        assert re.search("English\s+25", n)
        assert re.search("FORMAT.*MISSING\=(.+?)", n).groups()[0] == '?'
        assert re.search("FORMAT.*DATATYPE\=(\w+)\s", n).groups()[0] \
            == 'STANDARD'
        assert re.search('FORMAT.*SYMBOLS\="(\d+)";', n).groups()[0] \
            == '123456'

        os.unlink(tmp.name)  # cleanup

    def test_write_as_table(self):
        content = self.nex.write_as_table()
        assert re.search("Latin\s+36", content)
        assert re.search("French\s+14", content)
        assert re.search("English\s+25", content)
        assert len(content.split("\n")) == 3

    def test_write_as_table_with_polymorphoc(self):
        self.nex.add('French', 'char1', '2')
        content = self.nex.write_as_table()
        assert re.search("Latin\s+36", content)
        assert re.search("French\s+\(12\)4", content)
        assert re.search("English\s+25", content)
        assert len(content.split("\n")) == 3
예제 #16
0
 def setUp(self):
     self.nex = NexusWriter()
     for char in data:
         for taxon, value in data[char].items():
             self.nex.add(taxon, char, value)
예제 #17
0
def test_invalid():
    w = NexusWriter()
    with pytest.raises(ValueError):
        w.write()
예제 #18
0
def test_write_with_no_data_but_trees():
    nex = NexusWriter()
    nex.trees.append('tree tree1 = (French,(English,Latin));')
    content = nex.write()
    assert re.search(r"BEGIN TREES", content)
    assert re.search(r"tree tree1 = \(French,\(English,Latin\)\);", content)