Example #1
0
    def test_backfill_names_gap(self):
        """correctly backfill names"""
        consensus_tree = DndParser("(((s1,s2)g1,(s3,s4)g2,(s5,s6)g3)f1)o1;")
        rank_lookup = {'s':6,'g':5,'f':4,'o':3,'c':2,'p':1,'k':0}
        for n in consensus_tree.traverse(include_self=True):
            n.Rank = rank_lookup[n.Name[0]]
        input = "((((1)s1,(2)s2),((3)s3,(4)s5)))o1;"
        lookup = dict([(n.Name, n) for n in consensus_tree.traverse(include_self=True)])
        #exp = "((((1)s1,(2)s2)g1,((3)'g2; s3',(4)'g3; s5')))'o1; f1'"
        t = DndParser(input)
        t.Rank = 3
        t.Children[0].Rank = None
        t.Children[0].Children[0].Rank = None
        t.Children[0].Children[1].Rank = None
        t.Children[0].Children[0].Children[0].Rank = 6
        t.Children[0].Children[0].Children[1].Rank = 6
        t.Children[0].Children[1].Children[0].Rank = 6
        t.Children[0].Children[1].Children[1].Rank = 6

        backfill_names_gap(t, lookup)

        self.assertEqual(t.BackFillNames, ['o1'])
        self.assertEqual(t.Children[0].BackFillNames, [])
        self.assertEqual(t.Children[0].Children[0].BackFillNames, [])
        self.assertEqual(t.Children[0].Children[1].BackFillNames, [])
        self.assertEqual(t.Children[0].Children[0].Children[0].BackFillNames, ['f1','g1','s1'])
        self.assertEqual(t.Children[0].Children[0].Children[1].BackFillNames, ['f1','g1','s2'])
        self.assertEqual(t.Children[0].Children[1].Children[0].BackFillNames, ['f1','g2','s3'])
        self.assertEqual(t.Children[0].Children[1].Children[1].BackFillNames, ['f1','g3','s5'])
Example #2
0
 def test_score_tree(self):
     """Determine's the tree's fmeasure score"""
     # set RankNames and RankNameScores
     # if name in RankNames, check score, look at tips, etc
     t_str = "(((a,b),(c,d))e,(f,g),h)i;"
     t = DndParser(t_str)
     t.RankNames = ['i',None,None,None] # 1.0 * 6
     t.RankNameScores = [1.0,None,None,None]
     t.Children[0].RankNames = [None,'e','foo',None] # 0.5 * 3, 0.6 * 3
     t.Children[0].RankNameScores = [None, 0.5, 0.6, None]
     t.Children[0].Children[0].RankNames = [None] * 7
     t.Children[0].Children[1].RankNames = [None] * 7
     t.Children[1].RankNames = [None] * 7
     t.Children[1].RankNameScores = [None] * 7
     tips = t.tips()
     tips[0].Consensus = [None] * 7
     tips[1].Consensus = [1,3,None,None]
     tips[2].Consensus = [2,4,5,None]
     tips[3].Consensus = [None,1,None,None]
     tips[4].Consensus = [None,1,None,None]
     tips[5].Consensus = [2,None,3,None]
     tips[6].Consensus = [None,4,None,None]
     decorate_ntips(t)
     exp = ((1.0 * 6) + (0.5 * 3) + (0.6 * 3)) / (6 + 3 + 3)
     obs = score_tree(t)
     self.assertEqual(obs, exp)
Example #3
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from Alignment object aln.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.

    moltype: cogent.core.moltype.MolType object

    best_tree: if True (default:False), uses a slower but more accurate
    algorithm to build the tree.

    params: dict of parameters to pass in to the Clustal app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.
    """
    # Create instance of app controller, enable tree, disable alignment
    app = Clustalw(InputHandler="_input_as_multiline_string", params=params, WorkingDir="/tmp")
    app.Parameters["-align"].off()

    # Set params to empty dict if None.
    if params is None:
        params = {}

    if moltype == DNA or moltype == RNA:
        params["-type"] = "d"
    elif moltype == PROTEIN:
        params["-type"] = "p"
    else:
        raise ValueError, "moltype must be DNA, RNA, or PROTEIN"

    # best_tree -> bootstrap
    if best_tree:
        if "-bootstrap" not in params:
            app.Parameters["-bootstrap"].on(1000)
        if "-seed" not in params:
            app.Parameters["-seed"].on(randint(0, 1000))
        if "-bootlabels" not in params:
            app.Parameters["-bootlabels"].on("nodes")
    else:
        app.Parameters["-tree"].on()

    # Setup mapping. Clustalw clips identifiers. We will need to remap them.
    seq_collection = SequenceCollection(aln)
    int_map, int_keys = seq_collection.getIntMap()
    int_map = SequenceCollection(int_map)

    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result["Tree"].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (seq_collection, app, result, int_map, int_keys)

    return tree
Example #4
0
def get_support_file(group, tree_file, support_file):
    def test_group(s):
        try:
            return group[s]
        except KeyError:
            return None

    color_map = {}
    for ind, group_name in enumerate(list(set(group.itervalues()))):
        if len(list(set(group.itervalues())))>20:
            color_map[group_name] = "#000000"
        else:
            color_map[group_name] = COLS_BREWER[ind]
    color_dict = {}
    t = DndParser(open(tree_file, 'U'), constructor=PhyloNode, unescape_name=True)
    nodes = t.getNodesDict()
    for node, value in nodes.iteritems():
        sub_nodes = value.getNodeNames()
        sub_node_groups = set(map(test_group, sub_nodes))
        try:
            sub_node_groups.remove(None)
        except KeyError:
            pass
        sub_node_groups = list(sub_node_groups)
        if (len(sub_node_groups)) > 1:
            color_dict[node] = 'grey'
        else:
            color_dict[node] = color_map[sub_node_groups[0]]

    with open(support_file, 'w') as out:
        for node, color in color_dict.iteritems():
            out.write('%s\t%s\n' % (node, color))
Example #5
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from alignment
    
    Will check MolType of aln object
    """
    if params is None:
        params = {}

    if moltype == DNA or moltype == RNA:
        params['-nt'] = True
    elif moltype == PROTEIN:
        params['-nt'] = False
    else:
        raise ValueError, \
                "FastTree does not support moltype: %s" % moltype.label

    if best_tree:
        params['-slow'] = True

    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = aln.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map, MolType=moltype)

    app = FastTree(params=params)

    result = app(int_map.toFasta())
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    #remap tip names
    for tip in tree.tips():
        tip.Name = int_keys[tip.Name]

    return tree
Example #6
0
def sort_order(records):
    """returns the sort order by id"""
    tree = DndParser("(((nosp,sp)named,notnamed)inpref,\
                       ((nosp,sp)named,notnamed)outpref);")
    for n in tree.tips():
        n.LengthsAndIds = []
    lookup = {}
    lookup[('named_isolate',True,True)] = \
            tree.Children[0].Children[0].Children[0]
    lookup[('named_isolate',True,False)] = \
            tree.Children[0].Children[0].Children[1]
    lookup[('clone',True,False)] = \
            tree.Children[0].Children[1]
    lookup[('named_isolate',False,True)] = \
            tree.Children[1].Children[0].Children[0]
    lookup[('named_isolate',False,False)] = \
            tree.Children[1].Children[0].Children[1]
    lookup[('clone',False,False)] = \
            tree.Children[1].Children[1]
                       
    for k,v in records.items():
        to_lookup = tuple(v[1:])
        lookup[to_lookup].LengthsAndIds.append((v[0],k))

    order = []
    # tips go left->right
    for n in tree.tips():
        order.extend([i for l,i in sorted(n.LengthsAndIds)[::-1]])

    return order
Example #7
0
    def test_DndParser(self):
        """DndParser tests"""
        t_str = "(A_a,(B:1.0,C),'D_e':0.5)E;"
        tree_unesc = DndParser(t_str, PhyloNode, unescape_name=True)
        tree_esc = DndParser(t_str, PhyloNode, unescape_name=False)

        self.assertEqual(tree_unesc.Name, 'E')
        self.assertEqual(tree_unesc.Children[0].Name, 'A a')
        self.assertEqual(tree_unesc.Children[1].Children[0].Name, 'B')
        self.assertEqual(tree_unesc.Children[1].Children[0].Length, 1.0)
        self.assertEqual(tree_unesc.Children[1].Children[1].Name, 'C')
        self.assertEqual(tree_unesc.Children[2].Name, 'D_e')
        self.assertEqual(tree_unesc.Children[2].Length, 0.5)

        self.assertEqual(tree_esc.Name, 'E')
        self.assertEqual(tree_esc.Children[0].Name, 'A_a')
        self.assertEqual(tree_esc.Children[1].Children[0].Name, 'B')
        self.assertEqual(tree_esc.Children[1].Children[0].Length, 1.0)
        self.assertEqual(tree_esc.Children[1].Children[1].Name, 'C')
        self.assertEqual(tree_esc.Children[2].Name, "'D_e'")
        self.assertEqual(tree_esc.Children[2].Length, 0.5)

        reload_test = tree_esc.getNewick(with_distances=True, \
                                         escape_name=False)
        obs = DndParser(reload_test, unescape_name=False)
        self.assertEqual(obs.getNewick(with_distances=True), \
                         tree_esc.getNewick(with_distances=True))
        reload_test = tree_unesc.getNewick(with_distances=True, \
                                           escape_name=False)
        obs = DndParser(reload_test, unescape_name=False)
        self.assertEqual(obs.getNewick(with_distances=True), \
                         tree_unesc.getNewick(with_distances=True))
def check_tree_subset(fasta_labels,
                      tree_fp):
    """ Returns a list of all fasta labels that are not a subset of the tree
    
    fasta_labels:  list of fasta labels
    tree_fp: tree filepath
    """
    
    # Need to get modified fasta labels with underscore stripped
    
    raw_fasta_labels = set([label.split('_')[0] for label in fasta_labels])
    
    tree_f = open(tree_fp, "U")
    
    tree = DndParser(tree_f)
    
    # Get a set of tree tip names
    tree_tips = set(tree.getTipNames())
    
    labels_not_in_tips = []
    
    for curr_label in raw_fasta_labels:
        if curr_label not in tree_tips:
            labels_not_in_tips.append(curr_label)
    
    # Return True if all found in tree tips
    if len(labels_not_in_tips) == 0:
        labels_not_in_tips = True
    
    return labels_not_in_tips
Example #9
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from alignment
    
    Will check MolType of aln object
    """
    if params is None:
        params = {}

    if moltype == DNA or moltype == RNA:
        params["-nt"] = True
    elif moltype == PROTEIN:
        params["-nt"] = False
    else:
        raise ValueError, "FastTree does not support moltype: %s" % moltype.label

    if best_tree:
        params["-slow"] = True

    # Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = aln.getIntMap()
    # Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map, MolType=moltype)

    app = FastTree(params=params)

    result = app(int_map.toFasta())
    tree = DndParser(result["Tree"].read(), constructor=PhyloNode)
    # remap tip names
    for tip in tree.tips():
        tip.Name = int_keys[tip.Name]

    return tree
    def test_make_distance_based_exclusion_fn(self):
        """make_distance_based_exclusion_fn should return a working function"""

        exclude_similar_strains = make_distance_based_exclusion_fn(0.03)

        # Test that new function is documented
        exp_doc = "Exclude neighbors of tip within 0.030000 branch length units"
        self.assertEqual(exp_doc, exclude_similar_strains.__doc__)

        # Test that the function works

        test_tree = self.SimpleTree.deepcopy()
        # print test_tree.getNewick(with_distances=True)
        tip = test_tree.getNodeMatchingName("C")
        obs = exclude_similar_strains(tip, test_tree).getNewick(with_distances=True)
        exp = "(A:0.02,B:0.01)root;"
        self.assertEqual(obs, exp)

        # Test on a tree where a single node will remain
        test_tree = DndParser("((A:0.02,B:0.01)E:0.05,(C:0.06,D:0.01)F:0.05)root;")
        # print test_tree.getNewick(with_distances=True)
        tip = test_tree.getNodeMatchingName("D")
        obs = exclude_similar_strains(tip, test_tree).getNewick(with_distances=True)
        exp = "((A:0.02,B:0.01)E:0.05,C:0.11)root;"
        self.assertEqual(obs, exp)

        # Test that we raise if distance is too large
        test_tree = self.SimpleTree.deepcopy()
        test_fn = make_distance_based_exclusion_fn(300.0)
        tip = test_tree.getNodeMatchingName("C")

        self.assertRaises(ValueError, test_fn, tip, test_tree)
def check_tree_subset(fasta_labels, tree_fp):
    """ Returns a list of all fasta labels that are not a subset of the tree

    fasta_labels:  list of fasta labels
    tree_fp: tree filepath
    """

    # Need to get modified fasta labels with underscore stripped

    raw_fasta_labels = set([label.split('_')[0] for label in fasta_labels])

    tree_f = open(tree_fp, "U")

    tree = DndParser(tree_f)

    # Get a set of tree tip names
    tree_tips = set(tree.getTipNames())

    labels_not_in_tips = []

    for curr_label in raw_fasta_labels:
        if curr_label not in tree_tips:
            labels_not_in_tips.append(curr_label)

    # Return True if all found in tree tips
    if len(labels_not_in_tips) == 0:
        labels_not_in_tips = True

    return labels_not_in_tips
Example #12
0
 def test_bifurcating(self):
     """Coerces nodes to have <= 2 children"""
     t_str = "((a:1,b:2,c:3)d:4,(e:5,f:6,g:7)h:8,(i:9,j:10,k:11)l:12)m:14;"
     t = DndParser(t_str)
  
     # can't break up easily... sorry 80char
     exp_str = "((a:1.0,(b:2.0,c:3.0):0.0)d:4.0,((e:5.0,(f:6.0,g:7.0):0.0)h:8.0,(i:9.0,(j:10.0,k:11.0):0.0)l:12.0):0.0)m:14.0;"
     obs = t.bifurcating()
Example #13
0
    def test_bifurcating(self):
        """Coerces nodes to have <= 2 children"""
        t_str = "((a:1,b:2,c:3)d:4,(e:5,f:6,g:7)h:8,(i:9,j:10,k:11)l:12)m:14;"
        t = DndParser(t_str)

        # can't break up easily... sorry 80char
        exp_str = "((a:1.0,(b:2.0,c:3.0):0.0)d:4.0,((e:5.0,(f:6.0,g:7.0):0.0)h:8.0,(i:9.0,(j:10.0,k:11.0):0.0)l:12.0):0.0)m:14.0;"
        obs = t.bifurcating()
Example #14
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params={}):
    """Returns a tree from Alignment object aln.
    
    aln: an xxx.Alignment object, or data that can be used to build one.
    
    moltype: cogent.core.moltype.MolType object

    best_tree: best_tree suppport is currently not implemented
    
    params: dict of parameters to pass in to the RAxML app controller.
    
    The result will be an xxx.Alignment object, or None if tree fails.
    """
    if best_tree:
        raise NotImplementedError

    if '-m' not in params:
        if moltype == DNA or moltype == RNA:
            #params["-m"] = 'GTRMIX'
            # in version 7.2.3, GTRMIX is no longer supported but says GTRCAT
            # behaves like GTRMIX (http://www.phylo.org/tools/raxmlhpc2.html)
            params["-m"] = 'GTRGAMMA'
        elif moltype == PROTEIN:
            params["-m"] = 'PROTGAMMAmatrixName'
        else:
            raise ValueError("Moltype must be either DNA, RNA, or PROTEIN")

    if not hasattr(aln, 'toPhylip'):
        aln = Alignment(aln)
    seqs, align_map = aln.toPhylip()

    # generate temp filename for output
    params["-w"] = "/tmp/"
    params["-n"] = get_tmp_filename().split("/")[-1]
    params["-k"] = True
    params["-p"] = randint(1, 100000)
    params["-x"] = randint(1, 100000)

    ih = '_input_as_multiline_string'

    raxml_app = Raxml(params=params,
                      InputHandler=ih,
                      WorkingDir=None,
                      SuppressStderr=True,
                      SuppressStdout=True)

    raxml_result = raxml_app(seqs)

    tree = DndParser(raxml_result['Bootstrap'], constructor=PhyloNode)

    for node in tree.tips():
        node.Name = align_map[node.Name]

    raxml_result.cleanUp()

    return tree
Example #15
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params={}):
    """Returns a tree from Alignment object aln.
    
    aln: an xxx.Alignment object, or data that can be used to build one.
    
    moltype: cogent.core.moltype.MolType object

    best_tree: best_tree suppport is currently not implemented
    
    params: dict of parameters to pass in to the RAxML app controller.
    
    The result will be an xxx.Alignment object, or None if tree fails.
    """
    if best_tree:
        raise NotImplementedError

    if '-m' not in params:
        if moltype == DNA or moltype == RNA:
            #params["-m"] = 'GTRMIX'
            # in version 7.2.3, GTRMIX is no longer supported but says GTRCAT
            # behaves like GTRMIX (http://www.phylo.org/tools/raxmlhpc2.html)
            params["-m"] = 'GTRGAMMA'
        elif moltype == PROTEIN:
            params["-m"] = 'PROTGAMMAmatrixName'
        else:
            raise ValueError("Moltype must be either DNA, RNA, or PROTEIN")

    if not hasattr(aln, 'toPhylip'):
        aln = Alignment(aln)
    seqs, align_map = aln.toPhylip()

    # generate temp filename for output    
    params["-w"] = "/tmp/"    
    params["-n"] = get_tmp_filename().split("/")[-1]
    params["-k"] = True
    params["-p"] = randint(1,100000)
    params["-x"] = randint(1,100000)
    
    ih = '_input_as_multiline_string'    

    raxml_app = Raxml(params=params,
                      InputHandler=ih,
                      WorkingDir=None,
                      SuppressStderr=True,
                      SuppressStdout=True)
                      
    raxml_result = raxml_app(seqs)
    
    tree = DndParser(raxml_result['Bootstrap'], constructor=PhyloNode)
    
    for node in tree.tips():
        node.Name = align_map[node.Name]

    raxml_result.cleanUp()

    return tree
Example #16
0
def bootstrap_tree_from_alignment(aln, seed=None, num_trees=None, params=None):
    """Returns a tree from Alignment object aln with bootstrap support values.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.

    seed: an interger, seed value to use

    num_trees: an integer, number of trees to bootstrap against

    params: dict of parameters to pass in to the Clustal app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.

    If seed is not specifed in params, a random integer between 0-1000 is used.
    """
    # Create instance of controllor, enable bootstrap, disable alignment,tree
    app = Clustalw(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')
    app.Parameters['-align'].off()
    app.Parameters['-tree'].off()

    if app.Parameters['-bootstrap'].isOff():
        if num_trees is None:
            num_trees = 1000

        app.Parameters['-bootstrap'].on(num_trees)

    if app.Parameters['-seed'].isOff():
        if seed is None:
            seed = randint(0,1000)

        app.Parameters['-seed'].on(seed)

    if app.Parameters['-bootlabels'].isOff():
        app.Parameters['-bootlabels'].on("node")

    # Setup mapping. Clustalw clips identifiers. We will need to remap them.
    seq_collection = SequenceCollection(aln)
    int_map, int_keys = seq_collection.getIntMap()
    int_map = SequenceCollection(int_map)

    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del(seq_collection, app, result, int_map, int_keys)

    return tree
Example #17
0
def bootstrap_tree_from_alignment(aln, seed=None, num_trees=None, params=None):
    """Returns a tree from Alignment object aln with bootstrap support values.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.

    seed: an interger, seed value to use
    
    num_trees: an integer, number of trees to bootstrap against

    params: dict of parameters to pass in to the Clustal app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.

    If seed is not specifed in params, a random integer between 0-1000 is used.
    """
    # Create instance of controllor, enable bootstrap, disable alignment,tree
    app = Clustalw(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')
    app.Parameters['-align'].off()
    app.Parameters['-tree'].off()

    if app.Parameters['-bootstrap'].isOff():
        if num_trees is None:
            num_trees = 1000

        app.Parameters['-bootstrap'].on(num_trees)

    if app.Parameters['-seed'].isOff():
        if seed is None:
            seed = randint(0, 1000)

        app.Parameters['-seed'].on(seed)

    if app.Parameters['-bootlabels'].isOff():
        app.Parameters['-bootlabels'].on("node")

    # Setup mapping. Clustalw clips identifiers. We will need to remap them.
    seq_collection = SequenceCollection(aln)
    int_map, int_keys = seq_collection.getIntMap()
    int_map = SequenceCollection(int_map)

    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (seq_collection, app, result, int_map, int_keys)

    return tree
Example #18
0
 def test_get_nearest_named_ancestor(self):
     """correctly get the nearest named ancestor"""
     t = DndParser("(((s1,s2)g1,s3))root;")
     t2 = DndParser("(((s1,s2)g1,s3));")
     exp_t = t
     exp_t2 = None
     obs_t = get_nearest_named_ancestor(t.getNodeMatchingName('s3'))
     obs_t2 = get_nearest_named_ancestor(t2.getNodeMatchingName('s3'))
     self.assertEqual(obs_t, exp_t)
     self.assertEqual(obs_t2, exp_t2)
Example #19
0
 def test_reroot(self):
     """Should correctly reroot a tree"""
     t = DndParser("(((a,b)c,(d,e)f)g,(h,i)j);")
     tips = ['a','b']
     for n in t.traverse():
         n.Length = 1.0
     
     # note, g is lost because it has a single descendent and gets pruned off
     exp = "((a:1.0,b:1.0)c:0.5,((d:1.0,e:1.0)f:1.0,(h:1.0,i:1.0)j:2.0):0.5);"
     obs = reroot(t, tips)
     self.assertEqual(obs.getNewick(with_distances=True), exp)
Example #20
0
def raxml_alignment(align_obj,
                 raxml_model="GTRCAT",
                 params={},
                 SuppressStderr=True,
                 SuppressStdout=True):
    """Run raxml on alignment object 

    align_obj: Alignment object
    params: you can set any params except -w and -n

    returns: tuple (phylonode, 
                    parsimonyphylonode, 
                    log likelihood, 
                    total exec time)
    """

    # generate temp filename for output
    params["-w"] = "/tmp/"
    params["-n"] = get_tmp_filename().split("/")[-1]
    params["-m"] = raxml_model
    params["-p"] = randint(1,100000)
    ih = '_input_as_multiline_string'
    seqs, align_map = align_obj.toPhylip()
    
    #print params["-n"]

    # set up command
    raxml_app = Raxml(
                   params=params,
                   InputHandler=ih,
                   WorkingDir=None,
                   SuppressStderr=SuppressStderr,
                   SuppressStdout=SuppressStdout)

    # run raxml
    ra = raxml_app(seqs)

    # generate tree
    tree_node =  DndParser(ra["Result"])

    # generate parsimony tree
    parsimony_tree_node =  DndParser(ra["ParsimonyTree"])

    # extract log likelihood from log file
    log_file = ra["Log"]
    total_exec_time = exec_time = log_likelihood = 0.0
    for line in log_file:
        exec_time, log_likelihood = map(float, line.split())
        total_exec_time += exec_time

    # remove output files
    ra.cleanUp()

    return tree_node, parsimony_tree_node, log_likelihood, total_exec_time
Example #21
0
 def test_build_tree_from_alignment(self):
     tree = build_tree_from_alignment(self.seqs, DNA)
     # test expected output for fasttree 1.1 and 2.0.1
     try:
         for o,e in zip(tree.traverse(), DndParser(exp_tree).traverse()):
             self.assertEqual(o.Name,e.Name)
             self.assertFloatEqual(o.Length,e.Length)
     except AssertionError:
         for o,e in zip(tree.traverse(), DndParser(exp_tree_201).traverse()):
             self.assertEqual(o.Name,e.Name)
             self.assertFloatEqual(o.Length,e.Length)
Example #22
0
def assign_tax_labels_to_tree(tree, std):
    """Puts new tip labels onto tree
        tree : newick string
        std : output from shorten_taxonomy_strings
    """
    tree_nodes = DndParser(tree, PhyloNode)
    for node in tree_nodes.tips():
        label = node.Name.strip('\'')  #incase there are actual quotes
        tax = std[label]
        new_label = str(label) + '_' + tax
        node.Name = new_label
    return tree_nodes
Example #23
0
 def test_data(self):
     """DndParser should work as expected on real data"""
     t = DndParser(sample)
     self.assertEqual(
         str(t),
         '((xyz:0.28124,(def:0.24498,mno:0.03627):0.1771):0.0487,abc:0.05925,(ghi:0.06914,jkl:0.13776):0.09853);'
     )
     tdata = DndParser(node_data_sample, unescape_name=True)
     self.assertEqual(
         str(tdata),
         "((xyz:0.28124,(def:0.24498,mno:0.03627)A:0.1771)B:0.0487,abc:0.05925,(ghi:0.06914,jkl:0.13776)C:0.09853);"
     )
def assign_tax_labels_to_tree(tree,std):
    """Puts new tip labels onto tree
        tree : newick string
        std : output from shorten_taxonomy_strings
    """
    tree_nodes = DndParser(tree, PhyloNode)
    for node in tree_nodes.tips():
        label = node.Name.strip('\'') #incase there are actual quotes
        tax = std[label]
        new_label = str(label) + '_' + tax
        node.Name = new_label 
    return tree_nodes
Example #25
0
    def test_getsubtree(self):
        """testing getting a subtree
        """
        otu_names = ['NineBande', 'Mouse', 'HowlerMon', 'DogFaced']
        newick = '(((Human,HowlerMon),Mouse),NineBande,DogFaced);'
        newick_reduced = '((Mouse,HowlerMon),NineBande,DogFaced);'
        tree = DndParser(newick, constructor = PicrustNode) 

        subtree = tree.getSubTree(otu_names)
        new_tree = DndParser(newick_reduced, constructor = PicrustNode)       
        # check we get the same names
        self.assertEqual(*[len(t.Children) for t in (subtree,new_tree)])
        self.assertEqual(subtree.getNewick(), new_tree.getNewick())
def remove_taxonomy(tree, regex_string):
    """Puts new tip labels onto tree
        tree : LoadTree object
        regex_string : 
    """
    tree_nodes = DndParser(tree, PhyloNode)
    for node in tree_nodes.tips():
        label = node.Name.strip('\'') # incase there are actual quotes
        p = re.compile(regex_string)
        new_label = p.sub('', label)
        #print new_label
        node.Name = new_label 
    return tree_nodes
Example #27
0
def remove_taxonomy(tree, regex_string):
    """Puts new tip labels onto tree
        tree : LoadTree object
        regex_string : 
    """
    tree_nodes = DndParser(tree, PhyloNode)
    for node in tree_nodes.tips():
        label = node.Name.strip('\'')  # incase there are actual quotes
        p = re.compile(regex_string)
        new_label = p.sub('', label)
        #print new_label
        node.Name = new_label
    return tree_nodes
Example #28
0
 def test_gnodedata(self):
     """DndParser should assign Name to internal nodes correctly"""
     t = DndParser(nodedata)
     self.assertEqual(len(t), 2)
     self.assertEqual(len(t[0]), 0)  #first child is terminal
     self.assertEqual(len(t[1]), 2)  #second child has two children
     self.assertEqual(str(t), '(abc:3.0,(def:4.0,ghi:5.0)jkl:6.0);')
     info_dict = {}
     for node in t.traverse():
         info_dict[node.Name] = node.Length
     self.assertEqual(info_dict['abc'], 3.0)
     self.assertEqual(info_dict['def'], 4.0)
     self.assertEqual(info_dict['ghi'], 5.0)
     self.assertEqual(info_dict['jkl'], 6.0)
Example #29
0
 def test_ascii(self):
     self.tree.asciiArt()
     # unlabeled internal node
     tr = DndParser("(B:0.2,(C:0.3,D:0.4):0.6)F;")
     tr.asciiArt(show_internal=True, compact=False)
     tr.asciiArt(show_internal=True, compact=True)
     tr.asciiArt(show_internal=False, compact=False)
Example #30
0
 def test_gnodedata(self):
     """DndParser should assign Name to internal nodes correctly"""
     t = DndParser(nodedata)
     self.assertEqual(len(t), 2)
     self.assertEqual(len(t[0]), 0)  #first child is terminal
     self.assertEqual(len(t[1]), 2)  #second child has two children
     self.assertEqual(str(t), '(abc:3.0,(def:4.0,ghi:5.0)jkl:6.0);')
     info_dict = {}
     for node in t.traverse():
         info_dict[node.Name] = node.Length
     self.assertEqual(info_dict['abc'], 3.0)
     self.assertEqual(info_dict['def'], 4.0)
     self.assertEqual(info_dict['ghi'], 5.0)
     self.assertEqual(info_dict['jkl'], 6.0)
    def test_join_nodes(self):
        """join them nodes! (((99 + 97) + 94) + 91) + ..."""
        parsed = [make_nodes(self.clst_99, 0.01, 99),
                  make_nodes(self.clst_97, 0.02, 97),
                  make_nodes(self.clst_94, 0.03, 94)]

        exp = """((((3:.005)99_2_3:.01,(8:.005,7:.005)99_3_8:.01)97_0_3:.015)94_0_3,
                 (((1:.005,6:.005)99_1_1:.01)97_1_1:.015,
                 ((10:.005,20:.005,30:.005)99_0_10:.01)97_2_10:.015)94_1_1);"""
        expt = DndParser(exp)
        obs = join_nodes(parsed)

        self.assertEqual(obs.getNewick(with_distances=True),
                         expt.getNewick(with_distances=True))
Example #32
0
def build_tree_from_distance_matrix(matrix, best_tree=False, params={}, working_dir="/tmp"):
    """Returns a tree from a distance matrix.

    matrix: a square Dict2D object (cogent.util.dict2d)

    best_tree: if True (default:False), uses a slower but more accurate
    algorithm to build the tree.

    params: dict of parameters to pass in to the Clearcut app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.
    """
    params["--out"] = get_tmp_filename(working_dir)

    # Create instance of app controller, enable tree, disable alignment
    app = Clearcut(
        InputHandler="_input_as_multiline_string",
        params=params,
        WorkingDir=working_dir,
        SuppressStdout=True,
        SuppressStderr=True,
    )
    # Turn off input as alignment
    app.Parameters["-a"].off()
    # Input is a distance matrix
    app.Parameters["-d"].on()

    if best_tree:
        app.Parameters["-N"].on()

    # Turn the dict2d object into the expected input format
    matrix_input, int_keys = _matrix_input_from_dict2d(matrix)

    # Collect result
    result = app(matrix_input)

    # Build tree
    tree = DndParser(result["Tree"].read(), constructor=PhyloNode)

    # reassign to original names
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (app, result, params)

    return tree
Example #33
0
    def test_str(self):
        """RangeNode should round-trip Newick string corrrectly."""

        r = RangeNode()
        self.assertEqual(str(r), '()')

        #should work for tree with branch lengths set
        t = DndParser(self.sample_tree_string, RangeNode)
        expected = self.sample_tree_string.replace('\n', '')
        expected = expected.replace(' ', '')
        self.assertEqual(str(t), expected)
        #self.assertEqual(t.getNewick(with_distances=True), expected)
        #should also work for tree w/o branch lengths
        t2 = DndParser(self.sample_string_2, RangeNode)
        self.assertEqual(str(t2), self.sample_string_2)
Example #34
0
 def test_shuffle_tipnames(self):
     """shuffle_tipnames should return copy of tree w/ labels permuted"""
     #Note: this should never fail but is technically still stochastic
     #5! is 120 so repeating 5 times should fail about 1 in 10^10.
     for i in range(5):
         try:
             t = DndParser(self.t_str)
             result = shuffle_tipnames(t)
             orig_names = [n.Name for n in t.tips()]
             new_names = [n.Name for n in result.tips()]
             self.assertIsPermutation(orig_names, new_names)
             return
         except AssertionError:
             continue
     raise AssertionError("Produced same permutation in 5 tries: broken?")
 def test_shuffle_tipnames(self):
     """shuffle_tipnames should return copy of tree w/ labels permuted"""
     #Note: this should never fail but is technically still stochastic
     #5! is 120 so repeating 5 times should fail about 1 in 10^10.
     for i in range(5):
         try:
             t = DndParser(self.t_str)
             result = shuffle_tipnames(t)
             orig_names = [n.Name for n in t.tips()]
             new_names = [n.Name for n in result.tips()]
             self.assertIsPermutation(orig_names, new_names)
             return
         except AssertionError:
             continue
     raise AssertionError, "Produced same permutation in 5 tries: broken?"
Example #36
0
def load_tree(input, tipname_map, verbose=False):
    """Returns a PhyloNode tree decorated with helper attrs
    
    Helper attrs include Consensus, TipStart and TipStop. Nontips and tips that
    do not have consensus information will have [None] * len(RANK_ORDER) set 
    as Consensus
    """
    if verbose:
        print "loading tree..."
    if isinstance(input, TreeNode):
        tree = input
    else:
        tree = DndParser(input)

    tips = tree.tips()
    n_ranks = len(RANK_ORDER)

    for idx, tip in enumerate(tips):
        tip.TipStart = idx
        tip.TipStop = idx
        tip.Consensus = tipname_map.get(tip.Name, [None] * 7)

        if verbose and tip.Consensus is None:
            print "No consensus for %s" % tip.Name

    for node in tree.postorder(include_self=True):
        if node.istip():
            continue
        node.TipStart = node.Children[0].TipStart
        node.TipStop = node.Children[-1].TipStop
        node.Consensus = [None] * n_ranks

        if node.Name is None:
            node.Bootstrap = None
        else:
            try:
                node.Bootstrap = float(node.Name)
                node.Name = None
            except:
                if verbose:
                    print "Could not save bootstrap %s, node is root: %s" % \
                                       (node.Name, str(node.Parent == None))
                node.Bootstrap = None

    for tip in tree.tips():
        if tip.Name:
            tip.Name = tip.Name.replace("'","")
    return tree
Example #37
0
    def test_fitch_descendants_missing_data(self):
        """fitch_descendants should work with missing data"""
        #tree and envs for testing missing values
        t_str = '(((a:1,b:2):4,(c:3,d:1):2):1,(e:2,f:1):3);'
        env_str = """a   A
b   B
c   D
d   C
e   C
f   D"""
        t = DndParser(t_str, UniFracTreeNode)
        node_index, nodes = index_tree(t)
        env_counts = count_envs(env_str.split('\n'))

        count_array, unique_envs, env_to_index, node_to_index = \
            index_envs(env_counts, node_index)

        branch_lengths = get_branch_lengths(node_index)
        #test just the AB pair
        ab_counts = count_array[:, 0:2]
        bindings = bind_to_array(nodes, ab_counts)
        changes = fitch_descendants(bindings, counter=FitchCounter)
        self.assertEqual(changes, 1)
        orig_result = ab_counts.copy()
        #check that the original Fitch counter gives the expected
        #incorrect parsimony result
        changes = fitch_descendants(bindings, counter=FitchCounterDense)
        self.assertEqual(changes, 5)
        new_result = ab_counts.copy()
        #check that the two versions fill the array with the same values
        self.assertEqual(orig_result, new_result)
Example #38
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    # get options
    tree_fp = opts.input_tree
    tips_to_keep = opts.tips_to_keep.split(',')
    scoring_method = opts.scoring_method

    # load tree
    tree = DndParser(open(tree_fp, 'U'), constructor=PhyloNode)

    # decorate measurements onto tree (either by depth or by number of children)
    if scoring_method == 'depth':
        tree2 = decorate_depth(tree)
    elif scoring_method == 'numtips':
        tree2 = decorate_numtips(tree)

    # get the nodes for the inserted sequences
    nodes_dict = get_insert_dict(tree2, set(tips_to_keep))

    # remove nodes accordingly
    final_tree = drop_duplicate_nodes(tree2, nodes_dict)

    #final_tree.nameUnnamedNodes()

    # write out the resulting tree
    open_outpath = open(opts.output_fp, 'w')
    open_outpath.write(final_tree.getNewick(with_distances=True))
    open_outpath.close()
Example #39
0
def timing(tree_size, num_trees, num_samples):
	FastUnifrac_times = list()
	EMDUnifrac_times = list()
	EMDUnifrac_flow_times = list()
	for tree_it in range(num_trees):
		t = Tree()
		t.populate(tree_size, random_branches = True)
		tree_str = t.write(format=1)
		tr = DndParser(tree_str, UniFracTreeNode)
		(T,l,nodes_in_order) = EMDU.parse_tree(tree_str)
		for it in range(num_samples):
			envs = EMDU.simulate_data(t.get_leaf_names())  # FastUnifrac can only take weight on leaf nodes
			(envs_prob_dict, samples) = EMDU.parse_envs(envs, nodes_in_order)
			P = envs_prob_dict[samples[0]]
			Q = envs_prob_dict[samples[1]]
			#EMDUnifrac with flow
			t0 = timeit.default_timer()
			(Z, Flow, diffab) = EMDU.EMDUnifrac_weighted_flow(T, l, nodes_in_order, P, Q)
			t1 = timeit.default_timer()
			EMDUnifrac_flow_times.append(t1-t0)
			#EMDUnifrac no flow
			t0 = timeit.default_timer()
			(Z,diffab) = EMDU.EMDUnifrac_weighted(T, l, nodes_in_order, P, Q)
			t1 = timeit.default_timer()
			EMDUnifrac_times.append(t1-t0)
			#FastUnifrac weighted
			t0 = timeit.default_timer()
			res = fast_unifrac(tr, envs, weighted=True, modes=set(['distance_matrix']))
			t1 = timeit.default_timer()
			FastUnifrac_times.append(t1-t0)
	return  (np.array(EMDUnifrac_times).mean(), np.array(EMDUnifrac_flow_times).mean(), np.array(FastUnifrac_times).mean())
Example #40
0
def convert_tree_tips(align_map,tree_fp):
    """ rename the starting tree to correspond to the new phylip names, 
        which are assigned to each sequence """
    
    # flip key value pairs
    tree_tip_to_seq_name={}
    for i in align_map:
        tree_tip_to_seq_name[align_map[i]] = i

    # change the tip labels to phylip labels
    open_tree=open(tree_fp)
    tree=DndParser(open_tree, constructor=PhyloNode)
    for node in tree.tips():
        node.Name = tree_tip_to_seq_name[node.Name]
    
    return tree
Example #41
0
 def test_gonenest(self):
     """DndParser should work correctly with nested data"""
     t = DndParser(onenest)
     self.assertEqual(len(t), 2)
     self.assertEqual(len(t[0]), 0)  #first child is terminal
     self.assertEqual(len(t[1]), 2)  #second child has two children
     self.assertEqual(str(t), '(abc:3.0,(def:4.0,ghi:5.0):6.0);')
Example #42
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    output_dir = opts.output_dir
    create_dir(output_dir)

    otu_table_fp = opts.otu_table
    otu_table = load_table(otu_table_fp)

    tree_fh = open(opts.tree_file, 'U')
    tree = DndParser(tree_fh)
    tree_fh.close()

    mapping_fp = opts.mapping_fp
    if mapping_fp:
        mapping_f = open(mapping_fp, 'U')
        input_map_basename = splitext(split(mapping_fp)[1])[0]
    else:
        mapping_f = None
        input_map_basename = None

    input_table_basename = splitext(split(otu_table_fp)[1])[0]

    simsam_range_to_files(otu_table,
                          tree,
                          simulated_sample_sizes=map(int, opts.num.split(',')),
                          dissimilarities=map(float, opts.dissim.split(',')),
                          output_dir=output_dir,
                          mapping_f=mapping_f,
                          output_table_basename=input_table_basename,
                          output_map_basename=input_map_basename)
Example #43
0
    def test_join_nodes(self):
        """join them nodes! (((99 + 97) + 94) + 91) + ..."""
        parsed = [
            make_nodes(self.clst_99, 0.01, 99),
            make_nodes(self.clst_97, 0.02, 97),
            make_nodes(self.clst_94, 0.03, 94)
        ]

        exp = """((((3:.005)99_2_3:.01,(8:.005,7:.005)99_3_8:.01)97_0_3:.015)94_0_3,
                 (((1:.005,6:.005)99_1_1:.01)97_1_1:.015,
                 ((10:.005,20:.005,30:.005)99_0_10:.01)97_2_10:.015)94_1_1);"""
        expt = DndParser(exp)
        obs = join_nodes(parsed)

        self.assertEqual(obs.getNewick(with_distances=True),
                         expt.getNewick(with_distances=True))
Example #44
0
def wagner_for_picrust(tree_path,
                       trait_table_path,
                       gain=None,
                       max_paralogs=None,
                       HALT_EXEC=False):
    '''Runs count application controller given path of tree and trait table and returns a Table'''
    #initialize Count app controller
    count = Count(HALT_EXEC=HALT_EXEC)

    #set the parameters
    if gain:
        count.Parameters['-gain'].on(gain)
    if max_paralogs:
        count.Parameters['-max_paralogs'].on(max_paralogs)

    ###Have to manipulate the trait table some. Need to transpose it and strip ids surrounded in quotes.
    table = LoadTable(filename=trait_table_path, header=True, sep='\t')

    #get the first column (containing row ids)
    genome_ids = table.getRawData(table.Header[0])
    #remove single quotes from the id if they exist
    genome_ids = [str(id).strip('\'') for id in genome_ids]
    #transpose the matrix
    table = table.transposed(new_column_name=table.Header[0])
    #Change the headers
    table = table.withNewHeader(table.Header[1:], genome_ids)
    #write the modified table to a tmp file
    tmp_table_path = get_tmp_filename()
    table.writeToFile(tmp_table_path, sep='\t')

    #Run Count here
    result = count(data=(tree_path, tmp_table_path))

    #Remove tmp file
    remove(tmp_table_path)

    #tree=LoadTree(tree_path)
    tree = DndParser(open(tree_path))

    #parse the results into a Cogent Table
    asr_table = parse_wagner_parsimony_output(result["StdOut"].readlines(),
                                              remove_num_tips=len(tree.tips()))

    #transpose the table
    asr_table = asr_table.transposed(new_column_name='nodes')

    return asr_table
Example #45
0
    def test_cache_tipnames(self):
        """caches tipnames"""
        t = DndParser("((a,b)c,(d,e)f)g;")
        cache_tipnames(t)

        self.assertEqual(t.TipNames, ['a', 'b', 'd', 'e'])
        self.assertEqual(t.Children[0].TipNames, ['a', 'b'])
        self.assertEqual(t.Children[1].TipNames, ['d', 'e'])
Example #46
0
def build_tree_from_distance_matrix(matrix, best_tree=False, params={},\
    working_dir='/tmp'):
    """Returns a tree from a distance matrix.

    matrix: a square Dict2D object (cogent.util.dict2d)
    
    best_tree: if True (default:False), uses a slower but more accurate
    algorithm to build the tree.

    params: dict of parameters to pass in to the Clearcut app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.
    """
    params['--out'] = get_tmp_filename(working_dir)

    # Create instance of app controller, enable tree, disable alignment
    app = Clearcut(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir=working_dir, SuppressStdout=True,\
                   SuppressStderr=True)
    #Turn off input as alignment
    app.Parameters['-a'].off()
    #Input is a distance matrix
    app.Parameters['-d'].on()

    if best_tree:
        app.Parameters['-N'].on()

    # Turn the dict2d object into the expected input format
    matrix_input, int_keys = _matrix_input_from_dict2d(matrix)

    # Collect result
    result = app(matrix_input)

    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)

    # reassign to original names
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (app, result, params)

    return tree
Example #47
0
 def test_gsingle(self):
     """DndParser should produce a single-child PhyloNode on minimal data"""
     t = DndParser(single)
     self.assertEqual(len(t), 1)
     child = t[0]
     self.assertEqual(child.Name, 'abc')
     self.assertEqual(child.Length, 3)
     self.assertEqual(str(t), '(abc:3.0);')
Example #48
0
 def test_ascii(self):
     self.tree.asciiArt()
     # unlabeled internal node
     tr = DndParser("(B:0.2,(C:0.3,D:0.4):0.6)F;")
     tr.asciiArt(show_internal=True, compact=False)
     tr.asciiArt(show_internal=True, compact=True)
     tr.asciiArt(show_internal=False, compact=False)
Example #49
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from Alignment object aln.
    
    aln: a cogent.core.alignment.Alignment object, or data that can be used 
    to build one.
    
    moltype: cogent.core.moltype.MolType object

    best_tree: unsupported
    
    params: dict of parameters to pass in to the Muscle app controller.
    
    The result will be an cogent.core.tree.PhyloNode object, or None if tree 
    fails.
    """
    # Create instance of app controller, enable tree, disable alignment
    app = Muscle(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')

    app.Parameters['-cluster'].on()
    app.Parameters['-tree1'].on(get_tmp_filename(app.WorkingDir))
    app.Parameters['-seqtype'].on(moltype.label)

    seq_collection = SequenceCollection(aln, MolType=moltype)

    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map,MolType=moltype)


    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree1Out'].read(), constructor=PhyloNode)
    
    for tip in tree.tips():
        tip.Name = int_keys[tip.Name]

    # Clean up
    result.cleanUp()
    del(seq_collection, app, result)

    return tree
Example #50
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from Alignment object aln.
    
    aln: a cogent.core.alignment.Alignment object, or data that can be used 
    to build one.
    
    moltype: cogent.core.moltype.MolType object

    best_tree: unsupported
    
    params: dict of parameters to pass in to the Muscle app controller.
    
    The result will be an cogent.core.tree.PhyloNode object, or None if tree 
    fails.
    """
    # Create instance of app controller, enable tree, disable alignment
    app = Muscle(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')

    app.Parameters['-clusteronly'].on()
    app.Parameters['-tree1'].on(get_tmp_filename(app.WorkingDir))
    app.Parameters['-seqtype'].on(moltype.label)

    seq_collection = SequenceCollection(aln, MolType=moltype)

    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map,MolType=moltype)


    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree1Out'].read(), constructor=PhyloNode)
    
    for tip in tree.tips():
        tip.Name = int_keys[tip.Name]

    # Clean up
    result.cleanUp()
    del(seq_collection, app, result)

    return tree
Example #51
0
 def test_decorate_ntips(self):
     """correctly decorate the tree with the NumTips param"""
     input = "(((a,b)c,(d,e,f)g)h,(i,j)k)l;"
     tree = DndParser(input)
     tips = dict([(tip.Name, tip) for tip in tree.tips()])
     tips['a'].Consensus = [1,2,3,4,5,6,7]
     tips['b'].Consensus = [None,None,None,5,None,None,None]
     tips['d'].Consensus = [1,2,3,4,5,6,8]
     tips['e'].Consensus = [None, None,None,None,None,None,None]
     tips['f'].Consensus = [1,2,3,4,5,6,8]
     tips['i'].Consensus = [1,2,3,4,5,6,8]
     tips['j'].Consensus = [1,2,3,4,5,6,8]
     decorate_ntips(tree)
     self.assertEqual(tree.NumTips, 6)
     self.assertEqual(tree.Children[0].NumTips, 4)
     self.assertEqual(tree.Children[1].NumTips, 2)
     self.assertEqual(tree.Children[0].Children[0].NumTips, 2)
     self.assertEqual(tree.Children[0].Children[1].NumTips, 2)
    def setUp(self):
        self.SimpleTree = \
          DndParser("((A:0.02,B:0.01)E:0.05,(C:0.01,D:0.01)F:0.05)root;")

        self.SimpleTreeWithSpaces = \
          DndParser("((E coli:0.02,S typhimurium :0.01)Gamma proteobacteria:0.05,(C\t:0.01,D:0.01)F:0.05)root;")

        self.SimplePolytomyTree = \
          DndParser("((A:0.02,B:0.01,B_prime:0.03)E:0.05,(C:0.01,D:0.01)F:0.05)root;")

        self.SimpleUnlabelledTree = \
          DndParser("((A:0.02,B:0.01):0.05,(C:0.01,D:0.01):0.05)root;")

        #First number is GG id, the second is IMG
        self.GreengenesToIMG = \
          [('469810','645058788'),\
          ('457471','645058789'),\
          ('266998','641736109')]
Example #53
0
def cluster_seqs(seqs,
                 neighbor_join=False,
                 params={},
                 add_seq_names=True,
                 WorkingDir=None,
                 SuppressStderr=None,
                 SuppressStdout=None,
                 max_chars=1000000,
                 max_hours=1.0,
                 constructor=PhyloNode,
                 clean_up=True
                 ):
    """Muscle cluster list of sequences.
    
    seqs: either file name or list of sequence objects or list of strings or
        single multiline string containing sequences.
    
    Addl docs coming soon
    """
    num_seqs = len(seqs)
    if num_seqs < 2:
        raise ValueError("Muscle requres 2 or more sequences to cluster.")

    
    num_chars = sum(map(len, seqs))
    if num_chars > max_chars:
        params["-maxiters"] = 2
        params["-diags1"] = True
        params["-sv"] = True
        #params["-distance1"] = "kmer6_6"
        #params["-distance1"] = "kmer20_3"
        #params["-distance1"] = "kbit20_3"
        print("lots of chars, using fast align", num_chars)

    
    params["-maxhours"] = max_hours
    #params["-maxiters"] = 10
    
    #cluster_type = "upgmb"
    #if neighbor_join:
    #    cluster_type = "neighborjoining"
    
    params["-cluster"] = True
    params["-tree1"] = get_tmp_filename(WorkingDir)
    
    muscle_res = muscle_seqs(seqs,
                 params=params,
                 add_seq_names=add_seq_names,
                 WorkingDir=WorkingDir,
                 SuppressStderr=SuppressStderr,
                 SuppressStdout=SuppressStdout)
    
    tree = DndParser(muscle_res["Tree1Out"], constructor=constructor)
    
    if clean_up:
        muscle_res.cleanUp()
    return tree
def check_tree_exact_match(fasta_labels,
                           tree_fp):
    """Checks fasta labels to exact match to tree tips
    
    Returns a list of two lists, the fasta labels not in tips, and tips not
     in fasta labels.
    fasta_labels: list of fasta labels
    tree_fp: tree filepath
    """
    
    # Need to get modified fasta labels with underscore stripped
    
    raw_fasta_labels = set([label.split('_')[0] for label in fasta_labels])
    
    tree_f = open(tree_fp, "U")
    
    tree = DndParser(tree_f)
    
    # Get a set of tree tip names
    tree_tips = set(tree.getTipNames())
    
    labels_not_in_tips = []
    
    for curr_label in raw_fasta_labels:
        if curr_label not in tree_tips:
            labels_not_in_tips.append(curr_label)
            
    
    # Return True if all found in tree tips
    if len(labels_not_in_tips) == 0:
        labels_not_in_tips = True
        
    tips_not_in_labels = []
    
    for curr_tip in tree_tips:
        if curr_tip not in raw_fasta_labels:
            tips_not_in_labels.append(curr_tip)
            
    if len(tips_not_in_labels) == 0:
        tips_not_in_labels = True
    
    return [labels_not_in_tips, tips_not_in_labels]
Example #55
0
def main():
    
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    input_tree_fp = opts.input_tree_fp
    tips_fp = opts.tips_fp
    fasta_fp = opts.fasta_fp
    output_tree_fp = opts.output_tree_fp
    
    if tips_fp != None:
        tips_to_keep = get_seqs_to_keep_lookup_from_seq_id_file(open(tips_fp,'U'))
    elif fasta_fp != None:
        tips_to_keep = get_seqs_to_keep_lookup_from_fasta_file(open(fasta_fp,'U'))
    else:
        option_parser.error("Must provide either -t or -f.")
    
    tree = DndParser(open(input_tree_fp,'U'))
    
    if opts.negate:
        tips_to_keep = negate_tips_to_keep(tips_to_keep, tree)
    
    tree_out = tree.getSubTree(tips_to_keep)
   
    tree_out.writeToFile(output_tree_fp)
Example #56
0
    def setUp(self):
        """Make some standard objects to test."""
        #Notes on sample string:
        #
        #1. trailing zeros are stripped in conversion to/from float, so result
        #   is only exactly the same without them.
        #
        #2. trailing chars (e.g. semicolon) are not recaptured in the output,
        #   so were deleted from original Newick-format string.
        #
        #3. whitespace is stripped, but is handy for formatting, so is stripped
        #   from original string before comparisons.
        self.sample_tree_string = """
    (
    (
    xyz:0.28124,
    (
    def:0.24498,
    mno:0.03627)
    A:0.1771)
    B:0.0487,

    abc:0.05925,
    (
    ghi:0.06914,
    jkl:0.13776)
    C:0.09853)
    """
        self.t = DndParser(self.sample_tree_string, RangeNode)
        self.i = self.t.indexByAttr('Name')
        
        self.sample_string_2 = '((((a,b),c),(d,e)),((f,g),h))'
        self.t2 = DndParser(self.sample_string_2, RangeNode)
        self.i2 = self.t2.indexByAttr('Name')

        self.sample_string_3 = '(((a,b),c),(d,e))'
        self.t3 = DndParser(self.sample_string_3, RangeNode)
Example #57
0
    def setUp(self):
        """Define a couple of standard trees"""
        self.t1 = DndParser("(((a,b),c),(d,e))", UniFracTreeNode)
        self.t2 = DndParser("(((a,b),(c,d)),(e,f))", UniFracTreeNode)
        self.t3 = DndParser("(((a,b,c),(d)),(e,f))", UniFracTreeNode)
        self.t4 = DndParser("((c)b,((f,g,h)e,i)d)", UniFracTreeNode)
        self.t4.Name = "a"
        self.t_str = "((a:1,b:2):4,(c:3,(d:1,e:1):2):3)"

        self.t = DndParser(self.t_str, UniFracTreeNode)
        self.env_str = """
a   A   1
a   C   2
b   A   1
b   B   1
c   B   1
d   B   3
e   C   1"""
        self.env_counts = count_envs(self.env_str.splitlines())
        self.node_index, self.nodes = index_tree(self.t)
        self.count_array, self.unique_envs, self.env_to_index, self.node_to_index = index_envs(
            self.env_counts, self.node_index
        )
        self.branch_lengths = get_branch_lengths(self.node_index)

        self.old_t_str = "((org1:0.11,org2:0.22,(org3:0.12,org4:0.23)g:0.33)b:0.2,(org5:0.44,org6:0.55)c:0.3,org7:0.4)"

        self.old_t = DndParser(self.old_t_str, UniFracTreeNode)
        self.old_env_str = """
org1    env1    1
org1    env2    1
org2    env2    1
org3    env2    1
org4    env3    1
org5    env1    1
org6    env1    1
org7    env3    1
"""
        self.old_env_counts = count_envs(self.old_env_str.splitlines())
        self.old_node_index, self.old_nodes = index_tree(self.old_t)
        self.old_count_array, self.old_unique_envs, self.old_env_to_index, self.old_node_to_index = index_envs(
            self.old_env_counts, self.old_node_index
        )
        self.old_branch_lengths = get_branch_lengths(self.old_node_index)