Exemple #1
0
def factorTree(
    node,
    factor_by_capture=None,
    factor_by_field=None,
    factor_by_table=None,
    default=None,
    impute=False,
    patristic=False,
):
    import smot.algorithm as alg

    if factor_by_field is not None:
        try:
            field = int(factor_by_field)
        except ValueError:
            die(f"""Expected a positive integer for field --factor-by-field, got '{factor_by_field}'"""
                )
        node = alg.factorByField(node, field, default=default)
    elif factor_by_capture is not None:
        node = alg.factorByCapture(node,
                                   pat=factor_by_capture,
                                   default=default)
    elif factor_by_table is not None:
        node = alg.factorByTable(node,
                                 filename=factor_by_table,
                                 default=default)
    node = alg.setFactorCounts(node)

    if patristic:
        node = alg.imputePatristicFactors(node)
    elif impute:
        node = alg.imputeFactors(node)

    return node
Exemple #2
0
def chooseColorScheme(factors):
    # these colors are adapted from Paul Tol's notes here: https://personal.sron.nl/~pault/#sec:qualitative
    if len(factors) == 2:
        # orange and blue
        colors = ["#FFA000", "#0000FF"]
    elif len(factors) == 3:
        # Paul's three
        colors = ["#004488", "#DDAA33", "#BB5566"]
    elif len(factors) <= 6:
        # Paul's medium-contrast 6
        colors = [
            "#5486C0", "#053275", "#E9C353", "#866503", "#E7849A", "#863144"
        ]
    elif len(factors) <= 11:
        # Paul's sunset
        colors = [
            "#2A3789",
            "#3A65A8",
            "#5C95C2",
            "#87BEDA",
            "#E5E8C0",
            "#FCD17A",
            "#FAA353",
            "#F1693C",
            "#D32623",
            "#91001C",
        ]
    else:
        die("I can't handle more than 11 colors yet")

    colormap = {f: c for (f, c) in zip(factors, colors)}

    return colormap
Exemple #3
0
def para(
    factor_by_capture,
    factor_by_field,
    factor_by_table,
    keep,
    keep_regex,
    default,
    min_tips,
    proportion,
    scale,
    number,
    seed,
    newick,
    zero,
    tree,
):
    """
    Paraphyletic sampling. The sampling algorithm starts at the root and
    descends to the tips. At each node, we store monophyletic subtrees in a
    list and descend into polyphyletic ones (whose leaves have multiple
    factors). If we reach a tip or encounter a monophyletic child of a
    different factor than the stored subtrees, then we stop and sample from all
    tips in the stored trees and initialize a new list with the new
    monophyletic child.
    """

    import smot.algorithm as alg

    if not (proportion or scale or number):
        die("Please add either a --proportion or --scale or --number option")

    tree = read_tree(tree)
    tree.tree = factorTree(
        node=tree.tree,
        factor_by_capture=factor_by_capture,
        factor_by_field=factor_by_field,
        factor_by_table=factor_by_table,
        default=default,
    )
    tree.tree = alg.sampleParaphyletic(
        tree.tree,
        keep=keep,
        keep_regex=keep_regex,
        proportion=proportion,
        scale=scale,
        number=number,
        minTips=min_tips,
        seed=seed,
    )

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
Exemple #4
0
def prop(
    factor_by_capture,
    factor_by_field,
    factor_by_table,
    keep,
    keep_regex,
    default,
    min_tips,
    proportion,
    scale,
    number,
    seed,
    newick,
    zero,
    tree,
):
    """
    Proportional sampling. Randomly sample p (0 to 1, from --proportion) tips
    from each monophyletic (relative to factors) subtree. Retain at least N
    tips in each branch (--min-tips).
    """

    import smot.algorithm as alg

    if not (proportion or scale or number):
        die("Please add either a --proportion or --scale or --number option")

    tree = read_tree(tree)
    tree.tree = factorTree(
        node=tree.tree,
        factor_by_capture=factor_by_capture,
        factor_by_field=factor_by_field,
        factor_by_table=factor_by_table,
        default=default,
    )
    tree.tree = alg.sampleProportional(
        tree.tree,
        keep=keep,
        keep_regex=keep_regex,
        proportion=proportion,
        scale=scale,
        number=number,
        minTips=min_tips,
        seed=seed,
    )

    if newick:
        print(sf.newick(tree))
    else:
        print(sf.nexus(tree))
Exemple #5
0
def colorBranches(is_para, factor_by_capture, factor_by_field, factor_by_table,
                  colormap, tree):
    import smot.algorithm as alg

    tree = read_tree(tree)

    tree.tree = factorTree(
        node=tree.tree,
        factor_by_capture=factor_by_capture,
        factor_by_field=factor_by_field,
        factor_by_table=factor_by_table,
    )
    tree.tree = alg.setFactorCounts(tree.tree)

    factors = sorted(list(tree.tree.data.factorCount.keys()))

    _colormap = dict()
    if colormap:
        with open(colormap, "r") as f:
            try:
                _colormap = {
                    f.strip(): c.strip().upper()
                    for (f,
                         c) in [p.strip().split("\t") for p in f.readlines()]
                }
                for clade, color in _colormap.items():
                    if color[0] != "#":
                        _colormap[clade] = "#" + color
                    if len(color) != 7:
                        die('Expected colors in hexadecimal (e.g., "#AA10FF")')
            except ValueError:
                die("Invalid color map: expected TAB-delimited, two-column file"
                    )
    else:
        _colormap = chooseColorScheme(factors)

    if is_para:
        tree.tree = alg.colorPara(tree.tree, colormap=_colormap)
    else:
        tree.tree = alg.colorMono(tree.tree, colormap=_colormap)

    print(sf.nexus(tree))