Example #1
0
#!/usr/bin/env python
import sys, logging
from dendropy.utility.messaging import get_logger
_LOG = get_logger('sankoff')
from dendropy import DataSet
_DEBUGGING = True

verbose = False


def get_min_edge_costs(cost_row, costs_for_one_child):
    min_score = cost_row[0] + costs_for_one_child[0]
    for i in xrange(1, len(cost_row)):
        y = cost_row[i] + costs_for_one_child[i]
        if y < min_score:
            min_score = y
    return min_score


def get_min_cost(step_mat_row, child_costs):
    total_cost = 0
    for e in child_costs:
        total_cost = total_cost + get_min_edge_costs(step_mat_row, e)
    return total_cost


def sankoff(postorder_node_list, taxa_to_state_set_map, step_matrix):
    max_cost = 0
    num_states = len(step_matrix)
    for row in step_matrix:
        for cell in row:
#!/usr/bin/env python
import sys
import copy
import logging
import itertools
from dendropy.utility.messaging import get_logger
from dendropy.treesplit import encode_splits
from dendropy.treemanip import collapse_edge
from dendropy.treecalc import symmetric_difference
from dendropy import format_split, Edge, TaxonSet, DataSet

_LOG = get_logger('scripts.long_branch_symmdiff')
verbose = False

def long_branch_symmdiff(trees_to_compare, edge_len_threshold, copy_trees=False, rooted=False):
    """Returns matrix of the symmetric_differences between trees after all
    internal edges with lengths < `edge_len_threshold` have been collapsed.

    If `copy_trees` is True then the trees will be copied first (if False, then
        the trees may will have their short edges collapsed on exit).
    """
    if copy_trees:
        tree_list = [copy.copy(i) for i in trees_to_compare]
    else:
        tree_list = list(trees_to_compare)

    n_trees = len(tree_list)
    _LOG.debug('%d Trees to compare:\n%s\n' % (n_trees, '\n'.join([str(i) for i in tree_list])))
    if n_trees < 2:
        return [0 for t in tree_list]
 def _get_logger(self):
     if not hasattr(self, "_logger") or self._logger is None:
         self._logger = messaging.get_logger(self.__class__.__name__)
     return self._logger
#!/usr/bin/env python
import math
import logging
from dendropy.utility.messaging import get_logger
_LOG = get_logger('offspring')

class SingleLocusGenotype(object):
    "object that has an allele 1 & an allele 2"
    def __init__(self, first, second):
        self.first = min(first, second)
        self.second = max(first, second)

    def __str__(self):
        return repr(self.first) + '/' + repr(self.second)

    def calcLnL(self, outcrossing_prob, allele_freq, mom_g):
        prob_offspring_given_selfing = self.calc_prob_offspring_given_selfing(mom_g)
        _LOG.debug(str(self) +  " from " + str(mom_g) +  " P(self) = " + str(prob_offspring_given_selfing))
        prob_offspring_given_outcrossing = self.calc_prob_offspring_given_outcrossing(allele_freq, mom_g)
        _LOG.debug(str(self) +  " from " + str(mom_g) +  " P(outcross) = " + str(prob_offspring_given_outcrossing))
        return math.log(((1.0 - outcrossing_prob) * prob_offspring_given_selfing) + (outcrossing_prob * prob_offspring_given_outcrossing))

    def calc_prob_offspring_given_selfing(self, mom_g):
        if mom_g.first == mom_g.second:
            # mom is homozygous
            if self.first == self.second and (self.first == mom_g.first):
                return 1.0
            return 0.0
        else:
            if (self.first == self.second):
                # mom is het
Example #5
0
"""
Tree simulation and generation.
"""

import sys
import copy
import math

from dendropy.utility import GLOBAL_RNG
from dendropy.mathlib import probability
from dendropy import coalescent
from dendropy import dataobject
from dendropy import treemanip
from dendropy.utility.messaging import get_logger

_LOG = get_logger(__name__)


class TreeSimTotalExtinctionException(Exception):
    """Exception to be raised when branching process results in all lineages going extinct."""

    def __init__(self, *args, **kwargs):
        Exception.__init__(self, *args, **kwargs)


def star_tree(taxon_set):
    "Builds and returns a star tree from the given taxa block."
    star_tree = dataobject.Tree(taxon_set=taxon_set)
    for taxon in taxon_set:
        star_tree.seed_node.new_child(taxon=taxon)
    return star_tree
Example #6
0
#!/usr/bin/env python
import sys
import copy
import logging
import itertools
from dendropy.utility.messaging import get_logger
from dendropy.treesplit import encode_splits
from dendropy.treemanip import collapse_edge
from dendropy.treecalc import symmetric_difference
from dendropy import format_split, Edge, TaxonSet, DataSet

_LOG = get_logger('scripts.long_branch_symmdiff')
verbose = False


def long_branch_symmdiff(trees_to_compare,
                         edge_len_threshold,
                         copy_trees=False,
                         rooted=False):
    """Returns matrix of the symmetric_differences between trees after all
    internal edges with lengths < `edge_len_threshold` have been collapsed.

    If `copy_trees` is True then the trees will be copied first (if False, then
        the trees may will have their short edges collapsed on exit).
    """
    if copy_trees:
        tree_list = [copy.copy(i) for i in trees_to_compare]
    else:
        tree_list = list(trees_to_compare)

    n_trees = len(tree_list)
Example #7
0
#!/usr/bin/env python
import sys
from dendropy.utility.messaging import get_logger
_LOG = get_logger('sankoff')
from dendropy import DataSet
from dendropy.utility.error import DataParseError
_DEBUGGING = True
verbose = False

def get_min_edge_costs(step_mat_row, child_costs):
    min_score = step_mat_row[0] + child_costs[0]
    for i in xrange(1, len(step_mat_row)):
        y = step_mat_row[i] + child_costs[i]
        if y < min_score:
            min_score = y
    return min_score

def get_min_cost(step_mat_row, child_costs):
    total_cost = 0
    for e in child_costs:
        total_cost = total_cost + get_min_edge_costs(step_mat_row, e)
    return total_cost

def sankoff(postorder_node_list, step_matrix, taxa_to_state_set_map):
    max_cost = 0
    num_states = len(step_matrix)
    for row in step_matrix:
        for cell in row:
            if cell > max_cost:
                max_cost = cell
    impossible_cost = 1 + max_cost
Example #8
0
"""
Tree simulation and generation.
"""

import sys
import copy
import math

from dendropy.utility import GLOBAL_RNG
from dendropy.utility import probability
from dendropy import coalescent
from dendropy import dataobject
from dendropy import treemanip
from dendropy.utility.messaging import get_logger

_LOG = get_logger(__name__)


class TreeSimTotalExtinctionException(Exception):
    """Exception to be raised when branching process results in all lineages going extinct."""
    def __init__(self, *args, **kwargs):
        Exception.__init__(self, *args, **kwargs)


def star_tree(taxon_set):
    "Builds and returns a star tree from the given taxa block."
    star_tree = dataobject.Tree(taxon_set=taxon_set)
    for taxon in taxon_set:
        star_tree.seed_node.new_child(taxon=taxon)
    return star_tree
#!/usr/bin/env python
import sys
import os
import subprocess
from dendropy.utility.messaging import get_logger
from dendropy.treecalc import fitch_down_pass, fitch_up_pass
from dendropy import DataSet
from dendropy.utility.error import DataParseError
from dendropy.utility.textutils import escape_nexus_token
_DEBUGGING = True
_LOG = get_logger('geodispersal')
verbose = False
AREA_NAME_LIST = []
col_width = 17

def warn(msg):
    _LOG.warn(msg)
LAST_COMMAND = ''
def write_as_nexus(stream, patterns, label):
    global LAST_COMMAND
    stream.write("\n[!%s ]\n" % label)
    p = patterns[0]
    num_chars = len(patterns)
    num_areas = len(p)
    if num_areas < len(AREA_NAME_LIST):
        warn('%d labels were found in the labels file, but only %d areas were read in the input NEXUS files' % (
                    len(AREA_NAME_LIST),
                    num_areas))
    elif num_areas > len(AREA_NAME_LIST):
        warn('Only %d labels were found in the labels file, but %d areas were read in the input NEXUS files' % (
                    len(AREA_NAME_LIST),
##
##  If you use this work or any portion thereof in published work,
##  please cite it as:
##
##     Sukumaran, J. and M. T. Holder. 2010. DendroPy: a Python library
##     for phylogenetic computing. Bioinformatics 26: 1569-1571.
##
##############################################################################

"""
Facultative use of NCL for NEXUS parsing.
"""

import os
from dendropy.utility.messaging import get_logger
_LOG = get_logger("dataio.ncl")

DENDROPY_NCL_AVAILABILITY = False
try:
    from nexusclasslib import nclwrapper
    DENDROPY_NCL_AVAILABILITY = True
except ImportError:
    DENDROPY_NCL_AVAILABILITY = False
else:

    import os
    from threading import Thread, Event
    from dendropy import dataobject
    from dendropy.dataio import nexusreader_py
    from dendropy.dataio import nexustokenizer
    from dendropy.utility import iosys
Example #11
0
Wrapper for interacting with RSPR
"""

import subprocess
import uuid
import tempfile
import socket
import random
import os
import sys

import dendropy
from dendropy.utility.messaging import get_logger
from dendropy.utility import processio
from dendropy.utility import textprocessing
_LOG = get_logger("interop.rspr")

HOSTNAME = socket.gethostname()
PID = os.getpid()


class Rspr(object):
    """
    This class wraps all attributes and input needed to make a call to RSPR.

        https://github.com/cwhidden/rspr

    RSPR:

    Calculate approximate and exact Subtree Prune and Regraft (rSPR)
    distances and the associated maximum agreement forests (MAFs) between pairs
Example #12
0
import subprocess
from cStringIO import StringIO
import uuid
import tempfile
import socket
import random
import os
import sys

from optparse import OptionGroup
from optparse import OptionParser

import dendropy
from dendropy.utility.messaging import get_logger
_LOG = get_logger("interop.seqgen")

HOSTNAME = socket.gethostname()
PID = os.getpid()

def _get_strongly_unique_tempfile(dir=None):
    return tempfile.NamedTemporaryFile(dir=dir, prefix="dendropy_tempfile-{0}-{1}-{2}".format(HOSTNAME, PID, uuid.uuid4()))

def _get_tempfile(dir=None):
    return tempfile.NamedTemporaryFile(dir=dir)

class SeqGen(object):
    """
    This class wraps all attributes and input needed to make a call to SeqGen.
    """
#!/usr/bin/env python
#!/usr/bin/env python
import sys
import os
import subprocess
from dendropy.utility.messaging import get_logger
from dendropy.treecalc import fitch_down_pass, fitch_up_pass
from dendropy import DataSet
from dendropy.utility.error import DataParseError
from dendropy.utility.textutils import escape_nexus_token
_DEBUGGING = True
_LOG = get_logger('geodispersal')
verbose = False
AREA_NAME_LIST = []
col_width = 17


def warn(msg):
    _LOG.warn(msg)


LAST_COMMAND = ''


def write_as_nexus(stream, patterns, label):
    global LAST_COMMAND
    stream.write("\n[!%s ]\n" % label)
    p = patterns[0]
    num_chars = len(patterns)
    num_areas = len(p)
    if num_areas < len(AREA_NAME_LIST):
Example #14
0
except ImportError:
    from io import StringIO # Python 3
import uuid
import tempfile
import socket
import random
import os
import sys

from optparse import OptionGroup
from optparse import OptionParser

import dendropy
from dendropy.utility.messaging import get_logger
from dendropy.utility import processio
_LOG = get_logger("interop.seqgen")

HOSTNAME = socket.gethostname()
PID = os.getpid()

def _get_strongly_unique_tempfile(dir=None):
    return tempfile.NamedTemporaryFile(dir=dir, prefix="dendropy_tempfile-{0}-{1}-{2}".format(HOSTNAME, PID, uuid.uuid4()))

def _get_tempfile(dir=None):
    return tempfile.NamedTemporaryFile(dir=dir)

class SeqGen(object):
    """
    This class wraps all attributes and input needed to make a call to SeqGen.
    """
Example #15
0
def main():
    group_names = (
        ("@all"       , ".*"),
        ("@datamodel" , ".*_datamodel_.*"),
        ("@dataio"    , ".*_dataio_.*"),
        ("@newick"    , ".*_newick_.*"),
        ("@tree"      , ".*_tree_.*"),
        )
    test_group_patterns = collections.OrderedDict(group_names)
    test_group_names = list(test_group_patterns)
    parser = argparse.ArgumentParser()
    parser.add_argument("test_names",
            metavar="TEST",
            nargs="*",
            help= "Name of test(s) to run. These can be (dot-)qualified module, test"
            "case, or test name (e.g., 'test_module', 'test_module.TestCase1',"
            "'test_module.TestCase1.test1') or special pre-defined groups of"
            "tests (e.g., '@datamodel', '@dataio'). Type '--help-testgroups' for"
            "a list of available groups.")
    parser.add_argument("--help-testgroups",
            action="store_true",
            default=False,
            help="Show list of available test groups and exit.")
    parser.add_argument("--list-only",
            action="store_true",
            default=False,
            help="Do not actually run tests: just print list of test module names and exit.")
    parser.add_argument("-v", "--verbosity",
            default=3,
            type=int,
            help="Messaging noisiness (default: %(default)s)")
    parser.add_argument("--logging-level",
            default=os.environ.get(metavar.LOGGING_LEVEL_ENVAR, "NOTSET"),
            choices=["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
            help="Test logging level (default: '%(default)s')")
    parser.add_argument("-f", "--fail-fast",
            action="store_true",
            default=False,
            help="Stop the test run on the first error or failure.")
    parser.add_argument("-I", "--fail-incomplete",
            action="store_true",
            default=False,
            help="Fail incomplete or partially-complete test stubs.")
    args = parser.parse_args()

    if args.help_testgroups:
        out = sys.stdout
        out.write("Available special test groups:\n")
        for name in test_group_names:
            out.write("  - {}\n".format(name))
        sys.exit(0)

    # Set logging level:
    os.environ[metavar.LOGGING_LEVEL_ENVAR] = args.logging_level
    _LOG = messaging.get_logger("dendropy")

    # Set test specifications
    if args.fail_incomplete:
        os.environ[metavar.FAIL_INCOMPLETE_TESTS_ENVAR] = "1"

    # get test modules
    test_names = []
    filter_patterns = []
    for name in args.test_names:
        if name is None:
            continue
        if name.startswith("@"):
            try:
                filter_patterns.append(test_group_patterns[name])
            except KeyError:
                sys.exit("Unrecognized test group name '{}'. Accepted names: {}".format(name, test_group_names))
        else:
            name = name.replace(os.sep, ".")
            if name.endswith(".py"):
                name = name[:-3]
            if not name.startswith("dendropy.test."):
                if name.startswith("test."):
                    name = "dendropy." + name
                else:
                    name = "dendropy.test." + name
            test_names.append(name)

    if not test_names and not filter_patterns:
        test_names = dendropytest.discover_test_module_paths() # get all
    if filter_patterns:
        test_names.extend(dendropytest.discover_test_module_paths(filter_patterns))
    test_names = sorted(set(test_names))

    # 0: nothing
    # 1: errors and mishaps only + 0
    # 2: warnings + 1
    # 3: general messages + 2
    if args.verbosity >= 3 or args.list_only:
        if args.list_only:
            out = sys.stdout
        else:
            out = sys.stderr
        out.write("DendroPy tests to be run:\n")
        for mp in test_names:
            out.write(" + {}\n".format(mp))

    if args.list_only:
        sys.exit(0)

    tests = unittest.defaultTestLoader.loadTestsFromNames(test_names)
    test_suite = unittest.TestSuite(tests)
    test_runner = unittest.TextTestRunner(verbosity=args.verbosity, failfast=args.fail_fast)
    test_runner.run(test_suite)
Example #16
0
#!/usr/bin/env python
import math
import logging
from dendropy.utility.messaging import get_logger
_LOG = get_logger('offspring')


class SingleLocusGenotype(object):
    "object that has an allele 1 & an allele 2"

    def __init__(self, first, second):
        self.first = min(first, second)
        self.second = max(first, second)

    def __str__(self):
        return repr(self.first) + '/' + repr(self.second)

    def calcLnL(self, outcrossing_prob, allele_freq, mom_g):
        prob_offspring_given_selfing = self.calc_prob_offspring_given_selfing(
            mom_g)
        _LOG.debug(
            str(self) + " from " + str(mom_g) + " P(self) = " +
            str(prob_offspring_given_selfing))
        prob_offspring_given_outcrossing = self.calc_prob_offspring_given_outcrossing(
            allele_freq, mom_g)
        _LOG.debug(
            str(self) + " from " + str(mom_g) + " P(outcross) = " +
            str(prob_offspring_given_outcrossing))
        return math.log((
            (1.0 - outcrossing_prob) * prob_offspring_given_selfing) +
                        (outcrossing_prob * prob_offspring_given_outcrossing))
Example #17
0
def main():
    group_names = (
        ("@all"       , ".*"),
        ("@datamodel" , ".*_datamodel_.*"),
        ("@dataio"    , ".*_dataio_.*"),
        ("@newick"    , ".*_newick_.*"),
        ("@tree"      , ".*_tree_.*"),
        )
    test_group_patterns = collections.OrderedDict(group_names)
    test_group_names = list(test_group_patterns)
    parser = argparse.ArgumentParser()
    parser.add_argument("test_names",
            metavar="TEST",
            nargs="*",
            help= "Name of test(s) to run. These can be (dot-)qualified module, test"
            "case, or test name (e.g., 'test_module', 'test_module.TestCase1',"
            "'test_module.TestCase1.test1') or special pre-defined groups of"
            "tests (e.g., '@datamodel', '@dataio'). Type '--help-testgroups' for"
            "a list of available groups.")
    parser.add_argument("--help-testgroups",
            action="store_true",
            default=False,
            help="Show list of available test groups and exit.")
    parser.add_argument("--list-only",
            action="store_true",
            default=False,
            help="Do not actually run tests: just print list of test module names and exit.")
    parser.add_argument("-v", "--verbosity",
            default=3,
            type=int,
            help="Messaging noisiness (default: %(default)s)")
    parser.add_argument("--logging-level",
            default=os.environ.get(metavar.LOGGING_LEVEL_ENVAR, "NOTSET"),
            choices=["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
            help="Test logging level (default: '%(default)s')")
    parser.add_argument("-f", "--fail-fast",
            action="store_true",
            default=False,
            help="Stop the test run on the first error or failure.")
    parser.add_argument("-I", "--fail-incomplete",
            action="store_true",
            default=False,
            help="Fail incomplete or partially-complete test stubs.")
    args = parser.parse_args()

    if args.help_testgroups:
        out = sys.stdout
        out.write("Available special test groups:\n")
        for name in test_group_names:
            out.write("  - {}\n".format(name))
        sys.exit(0)

    # Set logging level:
    os.environ[metavar.LOGGING_LEVEL_ENVAR] = args.logging_level
    _LOG = messaging.get_logger("dendropy")

    # Set test specifications
    if args.fail_incomplete:
        os.environ[metavar.FAIL_INCOMPLETE_TESTS_ENVAR] = "1"

    # get test modules
    test_names = []
    filter_patterns = []
    for name in args.test_names:
        if name is None:
            continue
        if name.startswith("@"):
            try:
                filter_patterns.append(test_group_patterns[name])
            except KeyError:
                sys.exit("Unrecognized test group name '{}'. Accepted names: {}".format(name, test_group_names))
        else:
            name = name.replace(os.sep, ".")
            if name.endswith(".py"):
                name = name[:-3]
            if not name.startswith("dendropy.test."):
                if name.startswith("test."):
                    name = "dendropy." + name
                else:
                    name = "dendropy.test." + name
            test_names.append(name)

    if not test_names and not filter_patterns:
        test_names = dendropytest.discover_test_module_paths() # get all
    if filter_patterns:
        test_names.extend(dendropytest.discover_test_module_paths(filter_patterns))
    test_names = sorted(set(test_names))

    # 0: nothing
    # 1: errors and mishaps only + 0
    # 2: warnings + 1
    # 3: general messages + 2
    if args.verbosity >= 3 or args.list_only:
        if args.list_only:
            out = sys.stdout
        else:
            out = sys.stderr
        out.write("DendroPy tests to be run:\n")
        for mp in test_names:
            out.write(" + {}\n".format(mp))

    if args.list_only:
        sys.exit(0)

    tests = unittest.defaultTestLoader.loadTestsFromNames(test_names)
    test_suite = unittest.TestSuite(tests)
    test_runner = unittest.TextTestRunner(verbosity=args.verbosity, failfast=args.fail_fast)
    test_runner.run(test_suite)
Example #18
0
import sys
import subprocess
import tempfile
import re
import csv

import dendropy
from dendropy.utility import textprocessing
from dendropy.utility import error
from dendropy.utility import metavar
from dendropy.utility import container
from dendropy.utility import messaging
from dendropy.utility import filesys
from dendropy.utility import processio
from dendropy.dataio import nexuswriter
_LOG = messaging.get_logger(__name__)

import dendropy

PAUP_PATH = os.environ.get(metavar.DENDROPY_PAUP_PATH_ENVAR, "NONE")
if PAUP_PATH == "NONE":
    DENDROPY_PAUP_INTEROPERABILITY = False
else:
    DENDROPY_PAUP_INTEROPERABILITY = True

STANDARD_PREAMBLE = "set warnreset=no increase=auto warnroot=no warnReset=no warnTree=no warnTSave=no warnBlkName=no errorStop=no errorBeep=no queryBeep=no"

class PaupService(object):

    @staticmethod
    def call(
Example #19
0
 def _get_logger(self):
     if not hasattr(self, "_logger") or self._logger is None:
         self._logger = messaging.get_logger(self.__class__.__name__)
     return self._logger
##
##     Sukumaran, J. and M. T. Holder. 2010. DendroPy: a Python library
##     for phylogenetic computing. Bioinformatics 26: 1569-1571.
##
##############################################################################
"""
Support for coverage analysis.
"""

import unittest
import shutil
import sys
from optparse import OptionParser
from dendropy.utility import messaging

_LOG = messaging.get_logger(__name__)

DENDROPY_COVERAGE_ANALYSIS_AVAILABLE = False
try:
    from setuptools import Command
except ImportError:
    _LOG.warn(
        "setuptools.Command could not be imported: setuptools extensions not available"
    )
else:
    try:
        import coverage
    except ImportError:
        _LOG.warn(
            "coverage could not be imported: test coverage analysis not available"
        )
Example #21
0
##
##  If you use this work or any portion thereof in published work,
##  please cite it as:
##
##     Sukumaran, J. and M. T. Holder. 2010. DendroPy: a Python library
##     for phylogenetic computing. Bioinformatics 26: 1569-1571.
##
##############################################################################
"""
Facultative use of NCL for NEXUS parsing.
"""

import os
from dendropy.utility.messaging import get_logger

_LOG = get_logger("dataio.ncl")

DENDROPY_NCL_AVAILABILITY = False
try:
    from nexusclasslib import nclwrapper
    DENDROPY_NCL_AVAILABILITY = True
except ImportError:
    DENDROPY_NCL_AVAILABILITY = False
else:

    import os
    from threading import Thread, Event
    from dendropy import dataobject
    from dendropy.dataio import nexusreader_py
    from dendropy.dataio import nexustokenizer
    from dendropy.utility import iosys