def factorTree( node, factor_by_capture=None, factor_by_field=None, factor_by_table=None, default=None, impute=False, patristic=False, ): import smot.algorithm as alg if factor_by_field is not None: try: field = int(factor_by_field) except ValueError: die(f"""Expected a positive integer for field --factor-by-field, got '{factor_by_field}'""" ) node = alg.factorByField(node, field, default=default) elif factor_by_capture is not None: node = alg.factorByCapture(node, pat=factor_by_capture, default=default) elif factor_by_table is not None: node = alg.factorByTable(node, filename=factor_by_table, default=default) node = alg.setFactorCounts(node) if patristic: node = alg.imputePatristicFactors(node) elif impute: node = alg.imputeFactors(node) return node
def chooseColorScheme(factors): # these colors are adapted from Paul Tol's notes here: https://personal.sron.nl/~pault/#sec:qualitative if len(factors) == 2: # orange and blue colors = ["#FFA000", "#0000FF"] elif len(factors) == 3: # Paul's three colors = ["#004488", "#DDAA33", "#BB5566"] elif len(factors) <= 6: # Paul's medium-contrast 6 colors = [ "#5486C0", "#053275", "#E9C353", "#866503", "#E7849A", "#863144" ] elif len(factors) <= 11: # Paul's sunset colors = [ "#2A3789", "#3A65A8", "#5C95C2", "#87BEDA", "#E5E8C0", "#FCD17A", "#FAA353", "#F1693C", "#D32623", "#91001C", ] else: die("I can't handle more than 11 colors yet") colormap = {f: c for (f, c) in zip(factors, colors)} return colormap
def para( factor_by_capture, factor_by_field, factor_by_table, keep, keep_regex, default, min_tips, proportion, scale, number, seed, newick, zero, tree, ): """ Paraphyletic sampling. The sampling algorithm starts at the root and descends to the tips. At each node, we store monophyletic subtrees in a list and descend into polyphyletic ones (whose leaves have multiple factors). If we reach a tip or encounter a monophyletic child of a different factor than the stored subtrees, then we stop and sample from all tips in the stored trees and initialize a new list with the new monophyletic child. """ import smot.algorithm as alg if not (proportion or scale or number): die("Please add either a --proportion or --scale or --number option") tree = read_tree(tree) tree.tree = factorTree( node=tree.tree, factor_by_capture=factor_by_capture, factor_by_field=factor_by_field, factor_by_table=factor_by_table, default=default, ) tree.tree = alg.sampleParaphyletic( tree.tree, keep=keep, keep_regex=keep_regex, proportion=proportion, scale=scale, number=number, minTips=min_tips, seed=seed, ) if newick: print(sf.newick(tree)) else: print(sf.nexus(tree))
def prop( factor_by_capture, factor_by_field, factor_by_table, keep, keep_regex, default, min_tips, proportion, scale, number, seed, newick, zero, tree, ): """ Proportional sampling. Randomly sample p (0 to 1, from --proportion) tips from each monophyletic (relative to factors) subtree. Retain at least N tips in each branch (--min-tips). """ import smot.algorithm as alg if not (proportion or scale or number): die("Please add either a --proportion or --scale or --number option") tree = read_tree(tree) tree.tree = factorTree( node=tree.tree, factor_by_capture=factor_by_capture, factor_by_field=factor_by_field, factor_by_table=factor_by_table, default=default, ) tree.tree = alg.sampleProportional( tree.tree, keep=keep, keep_regex=keep_regex, proportion=proportion, scale=scale, number=number, minTips=min_tips, seed=seed, ) if newick: print(sf.newick(tree)) else: print(sf.nexus(tree))
def colorBranches(is_para, factor_by_capture, factor_by_field, factor_by_table, colormap, tree): import smot.algorithm as alg tree = read_tree(tree) tree.tree = factorTree( node=tree.tree, factor_by_capture=factor_by_capture, factor_by_field=factor_by_field, factor_by_table=factor_by_table, ) tree.tree = alg.setFactorCounts(tree.tree) factors = sorted(list(tree.tree.data.factorCount.keys())) _colormap = dict() if colormap: with open(colormap, "r") as f: try: _colormap = { f.strip(): c.strip().upper() for (f, c) in [p.strip().split("\t") for p in f.readlines()] } for clade, color in _colormap.items(): if color[0] != "#": _colormap[clade] = "#" + color if len(color) != 7: die('Expected colors in hexadecimal (e.g., "#AA10FF")') except ValueError: die("Invalid color map: expected TAB-delimited, two-column file" ) else: _colormap = chooseColorScheme(factors) if is_para: tree.tree = alg.colorPara(tree.tree, colormap=_colormap) else: tree.tree = alg.colorMono(tree.tree, colormap=_colormap) print(sf.nexus(tree))