Beispiel #1
0
def load_picrust_tree(tree_fp, verbose=False):
    """Safely load a tree for picrust"""
    #PicrustNode seems to run into very slow/memory intentsive perfromance...
    #tree = DndParser(open(opts.input_tree),constructor=PicrustNode)
    tree = DndParser(open(tree_fp),constructor=PicrustNode)
    label_conversion_fns = set_label_conversion_fns(verbose=verbose)

    tree = fix_tree_labels(tree,label_conversion_fns)
    return tree
Beispiel #2
0
def bootstrap_tree_from_alignment(aln, seed=None, num_trees=None, params=None):
    """Returns a tree from Alignment object aln with bootstrap support values.

    aln: an cogent.core.alignment.Alignment object, or data that can be used
    to build one.

    seed: an interger, seed value to use
    
    num_trees: an integer, number of trees to bootstrap against

    params: dict of parameters to pass in to the Clustal app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.

    If seed is not specifed in params, a random integer between 0-1000 is used.
    """
    # Create instance of controllor, enable bootstrap, disable alignment,tree
    app = Clustalw(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')
    app.Parameters['-align'].off()
    app.Parameters['-tree'].off()

    if app.Parameters['-bootstrap'].isOff():
        if num_trees is None:
            num_trees = 1000

        app.Parameters['-bootstrap'].on(num_trees)

    if app.Parameters['-seed'].isOff():
        if seed is None:
            seed = randint(0, 1000)

        app.Parameters['-seed'].on(seed)

    if app.Parameters['-bootlabels'].isOff():
        app.Parameters['-bootlabels'].on("node")

    # Setup mapping. Clustalw clips identifiers. We will need to remap them.
    seq_collection = SequenceCollection(aln)
    int_map, int_keys = seq_collection.getIntMap()
    int_map = SequenceCollection(int_map)

    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (seq_collection, app, result, int_map, int_keys)

    return tree
Beispiel #3
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params={}):
    """Returns a tree from Alignment object aln.
    
    aln: an xxx.Alignment object, or data that can be used to build one.
    
    moltype: cogent.core.moltype.MolType object

    best_tree: best_tree suppport is currently not implemented
    
    params: dict of parameters to pass in to the RAxML app controller.
    
    The result will be an xxx.Alignment object, or None if tree fails.
    """
    if best_tree:
        raise NotImplementedError

    if '-m' not in params:
        if moltype == DNA or moltype == RNA:
            #params["-m"] = 'GTRMIX'
            # in version 7.2.3, GTRMIX is no longer supported but says GTRCAT
            # behaves like GTRMIX (http://www.phylo.org/tools/raxmlhpc2.html)
            params["-m"] = 'GTRGAMMA'
        elif moltype == PROTEIN:
            params["-m"] = 'PROTGAMMAmatrixName'
        else:
            raise ValueError("Moltype must be either DNA, RNA, or PROTEIN")

    if not hasattr(aln, 'toPhylip'):
        aln = Alignment(aln)
    seqs, align_map = aln.toPhylip()

    # generate temp filename for output
    params["-w"] = "/tmp/"
    params["-n"] = get_tmp_filename().split("/")[-1]
    params["-k"] = True
    params["-p"] = randint(1, 100000)
    params["-x"] = randint(1, 100000)

    ih = '_input_as_multiline_string'

    raxml_app = Raxml(params=params,
                      InputHandler=ih,
                      WorkingDir=None,
                      SuppressStderr=True,
                      SuppressStdout=True)

    raxml_result = raxml_app(seqs)

    tree = DndParser(raxml_result['Bootstrap'], constructor=PhyloNode)

    for node in tree.tips():
        node.Name = align_map[node.Name]

    raxml_result.cleanUp()

    return tree
Beispiel #4
0
 def test_strip_and_rename_unwanted_labels_from_tree(self):
     """Remove unwanted text from Tip labels"""
     
     # parse and load tree
     result=DndParser(StringIO(RESULTING_QUERY_TREE), constructor=PhyloNode)
     
     # strip and rename tips
     result_tree=strip_and_rename_unwanted_labels_from_tree(self.align_map,\
                                                            result)
     self.assertEqual(result_tree.getNewick(with_distances=True), \
                      STRIPPED_TREE)
Beispiel #5
0
def cluster_seqs(seqs,
                 neighbor_join=False,
                 params={},
                 add_seq_names=True,
                 WorkingDir=None,
                 SuppressStderr=None,
                 SuppressStdout=None,
                 max_chars=1000000,
                 max_hours=1.0,
                 constructor=PhyloNode,
                 clean_up=True):
    """Muscle cluster list of sequences.
    
    seqs: either file name or list of sequence objects or list of strings or
        single multiline string containing sequences.
    
    Addl docs coming soon
    """
    num_seqs = len(seqs)
    if num_seqs < 2:
        raise ValueError, "Muscle requres 2 or more sequences to cluster."

    num_chars = sum(map(len, seqs))
    if num_chars > max_chars:
        params["-maxiters"] = 2
        params["-diags1"] = True
        params["-sv"] = True
        #params["-distance1"] = "kmer6_6"
        #params["-distance1"] = "kmer20_3"
        #params["-distance1"] = "kbit20_3"
        print "lots of chars, using fast align", num_chars

    params["-maxhours"] = max_hours
    #params["-maxiters"] = 10

    #cluster_type = "upgmb"
    #if neighbor_join:
    #    cluster_type = "neighborjoining"

    params["-cluster"] = True
    params["-tree1"] = get_tmp_filename(WorkingDir)

    muscle_res = muscle_seqs(seqs,
                             params=params,
                             add_seq_names=add_seq_names,
                             WorkingDir=WorkingDir,
                             SuppressStderr=SuppressStderr,
                             SuppressStdout=SuppressStdout)

    tree = DndParser(muscle_res["Tree1Out"], constructor=constructor)

    if clean_up:
        muscle_res.cleanUp()
    return tree
Beispiel #6
0
 def test_missing_tip_name(self):
     """DndParser should produce the correct tree when missing a name"""
     obs = DndParser(missing_tip_name)
     exp = PhyloNode()
     exp.append(PhyloNode())
     exp.append(PhyloNode())
     exp.Children[0].append(PhyloNode(Name='a'))
     exp.Children[0].append(PhyloNode(Name='b'))
     exp.Children[1].append(PhyloNode(Name='c'))
     exp.Children[1].append(PhyloNode())
     self.assertEqual(str(obs), str(exp))
Beispiel #7
0
 def test_nonames(self):
     """DndParser should produce the correct tree when there are no names"""
     obs = DndParser(no_names)
     exp = PhyloNode()
     exp.append(PhyloNode())
     exp.append(PhyloNode())
     exp.Children[0].append(PhyloNode())
     exp.Children[0].append(PhyloNode())
     exp.Children[1].append(PhyloNode())
     exp.Children[1].append(PhyloNode())
     self.assertEqual(str(obs), str(exp))
Beispiel #8
0
def assign_tax_labels_to_tree(tree, std):
    """Puts new tip labels onto tree
        tree : newick string
        std : output from shorten_taxonomy_strings
    """
    tree_nodes = DndParser(tree, PhyloNode)
    for node in tree_nodes.tips():
        label = node.Name.strip('\'')  #incase there are actual quotes
        tax = std[label]
        new_label = str(label) + '_' + tax
        node.Name = new_label
    return tree_nodes
Beispiel #9
0
def remove_taxonomy(tree, regex_string):
    """Puts new tip labels onto tree
        tree : LoadTree object
        regex_string : 
    """
    tree_nodes = DndParser(tree, PhyloNode)
    for node in tree_nodes.tips():
        label = node.Name.strip('\'')  # incase there are actual quotes
        p = re.compile(regex_string)
        new_label = p.sub('', label)
        #print new_label
        node.Name = new_label
    return tree_nodes
    def test_unifrac_explicit(self):
        """unifrac should correctly compute correct values.
        
        environment M contains only tips not in tree, tip j is in no envs
        values were calculated by hand
        """
        t1 = DndParser('((a:1,b:2):4,((c:3, j:17),(d:1,e:1):2):3)', \
            UniFracTreeNode) # note c,j is len 0 node
        #           /-------- /-a
        # ---------|          \-b
        #          |          /-------- /-c
        #           \--------|          \-j
        #                     \-------- /-d
        #                               \-e

        env_str = """
        a   A   1
        a   C   2
        b   A   1
        b   B   1
        c   B   1
        d   B   3
        e   C   1
        m   M   88"""
        env_counts = count_envs(env_str.splitlines())
        self.assertFloatEqual(fast_unifrac(t1,env_counts)['distance_matrix'], \
            (array(
            [[0,10/16, 8/13],
            [10/16,0,8/17],
            [8/13,8/17,0]]),['A','B','C']))
        # changing tree topology relative to c,j tips shouldn't change
        # anything
        t2 = DndParser('((a:1,b:2):4,((c:2, j:16):1,(d:1,e:1):2):3)', \
            UniFracTreeNode)
        self.assertFloatEqual(fast_unifrac(t2,env_counts)['distance_matrix'], \
            (array(
            [[0,10/16, 8/13],
            [10/16,0,8/17],
            [8/13,8/17,0]]),['A','B','C']))
Beispiel #11
0
def insert_sequences_into_tree(seqs, moltype, params={},
                                           write_log=True):
    """Insert sequences into Tree.
    
    aln: an xxx.Alignment object, or data that can be used to build one.
    
    moltype: cogent.core.moltype.MolType object
    
    params: dict of parameters to pass in to the RAxML app controller.
    
    The result will be a tree.
    """
    
    ih = '_input_as_multiline_string'    

    raxml_app = Raxml(params=params,
                      InputHandler=ih,
                      WorkingDir=None,
                      SuppressStderr=False,
                      SuppressStdout=False,
                      HALT_EXEC=False)
    
    raxml_result = raxml_app(seqs)
    
    # write a log file
    if write_log:
        log_fp = join(params["-w"],'log_raxml_'+split(get_tmp_filename())[-1])
        log_file=open(log_fp,'w')
        log_file.write(raxml_result['StdOut'].read())
        log_file.close()
    
    ''' 
    # getting setup since parsimony doesn't output tree..only jplace, however
    # it is currently corrupt
        
    # use guppy to convert json file into a placement tree
    guppy_params={'tog':None}

    new_tree=build_tree_from_json_using_params(raxml_result['json'].name, \
                                               output_dir=params["-w"], \
                                               params=guppy_params)
    '''
    
    # get tree from 'Result Names'
    new_tree=raxml_result['Result'].readlines()
    filtered_tree=re.sub('\[I\d+\]','',str(new_tree))
    tree = DndParser(filtered_tree, constructor=PhyloNode)

    raxml_result.cleanUp()

    return tree
Beispiel #12
0
    def setUp(self):
        """General setUp method for all tests in this file"""

        #ALIGNMENTS
        self.aln1 = Alignment(['ABC', 'BCC', 'BAC'])

        #alignment from Henikoff 1994
        self.aln2 = Alignment({'seq1':'GYVGS','seq2':'GFDGF','seq3':'GYDGF',\
            'seq4':'GYQGG'},Names=['seq1','seq2','seq3','seq4'])

        #alignment from Vingron & Sibbald 1993
        self.aln3 = Alignment({'seq1':'AA', 'seq2':'AA', 'seq3':'BB'},\
            Names=['seq1','seq2','seq3'])

        #alignment from Vingron & Sibbald 1993
        self.aln4 = Alignment({'seq1':'AA', 'seq2':'AA', 'seq3':'BB',\
        'seq4':'BB','seq5':'CC'},Names=['seq1','seq2','seq3','seq4','seq5'])

        self.aln5 = Alignment(['ABBA', 'ABCA', 'CBCB'])

        #alignment 5S rRNA seqs from Hein 1990
        self.aln6 = ClustalParser(FIVE_S_ALN.split('\n'))

        #alignment from Vingron & Sibbald 1993
        self.aln7 = Alignment(
            {
                'seq1': 'AGCTA',
                'seq2': 'AGGTA',
                'seq3': 'ACCTG',
                'seq4': 'TGCAA'
            },
            Names=['seq1', 'seq2', 'seq3', 'seq4'])

        #TREES (SEE BOTTOM OF FILE FOR DESCRIPTION)
        self.tree1 = DndParser(TREE_1)
        self.tree2 = DndParser(TREE_2)
        self.tree3 = DndParser(TREE_3)
        self.tree4 = DndParser(TREE_4)
        self.tree5 = DndParser(TREE_5)
        self.tree6 = DndParser(TREE_6)
        self.tree7 = DndParser(TREE_7)
        self.tree8 = DndParser(TREE_8)
        self.tree9 = DndParser(TREE_9)
Beispiel #13
0
    def test_write_updated_tree_file(self):
        """Write tree out"""

        # create temp filename
        new_tree_fp = splitext(get_tmp_filename())[0] + '.tre'
        self._paths_to_clean_up.append(new_tree_fp)

        # parse and load tree
        tree = DndParser(StringIO(STARTING_TREE), constructor=PhyloNode)

        # write out temp tree
        write_updated_tree_file(new_tree_fp, tree)

        self.assertTrue(open(new_tree_fp).read() > 0)
Beispiel #14
0
 def unifrac_pycogent(self):
     """Step 3 with Pycogent"""
     tree_newick = open(self.fasttree_tree, 'r').read()
     from cogent.parse.tree import DndParser
     from cogent.maths.unifrac.fast_tree import UniFracTreeNode
     tree = DndParser(tree_newick, UniFracTreeNode)
     from cogent.maths.unifrac.fast_unifrac import fast_unifrac
     distances = fast_unifrac(tree, self.tax.otu_table.to_dict())
     # Make a dataframe #
     names = distances['distance_matrix'][1]
     df = pandas.DataFrame(distances['distance_matrix'][0],
                           index=names,
                           columns=names)
     df.to_csv(self.distances_csv, sep='\t', float_format='%.5g')
Beispiel #15
0
 def test_gnodedata(self):
     """DndParser should assign Name to internal nodes correctly"""
     t = DndParser(nodedata)
     self.assertEqual(len(t), 2)
     self.assertEqual(len(t[0]), 0)  #first child is terminal
     self.assertEqual(len(t[1]), 2)  #second child has two children
     self.assertEqual(str(t), '(abc:3.0,(def:4.0,ghi:5.0)jkl:6.0);')
     info_dict = {}
     for node in t.traverse():
         info_dict[node.Name] = node.Length
     self.assertEqual(info_dict['abc'], 3.0)
     self.assertEqual(info_dict['def'], 4.0)
     self.assertEqual(info_dict['ghi'], 5.0)
     self.assertEqual(info_dict['jkl'], 6.0)
Beispiel #16
0
    def setUp(self):
        """Make some standard objects to test."""
        #Notes on sample string:
        #
        #1. trailing zeros are stripped in conversion to/from float, so result
        #   is only exactly the same without them.
        #
        #2. trailing chars (e.g. semicolon) are not recaptured in the output,
        #   so were deleted from original Newick-format string.
        #
        #3. whitespace is stripped, but is handy for formatting, so is stripped
        #   from original string before comparisons.
        self.sample_tree_string = """
    (
    (
    xyz:0.28124,
    (
    def:0.24498,
    mno:0.03627)
    A:0.1771)
    B:0.0487,

    abc:0.05925,
    (
    ghi:0.06914,
    jkl:0.13776)
    C:0.09853)
    """
        self.t = DndParser(self.sample_tree_string, RangeNode)
        self.i = self.t.indexByAttr('Name')

        self.sample_string_2 = '((((a,b),c),(d,e)),((f,g),h))'
        self.t2 = DndParser(self.sample_string_2, RangeNode)
        self.i2 = self.t2.indexByAttr('Name')

        self.sample_string_3 = '(((a,b),c),(d,e))'
        self.t3 = DndParser(self.sample_string_3, RangeNode)
Beispiel #17
0
def parse_newick(lines, constructor=PhyloNode):
    """Return PhyloNode from newick file handle stripping quotes from tip names

        This function wraps cogent.parse.tree.DndParser stripping
         matched leading/trailing single quotes from tip names, and returning
         a PhyloNode object by default (alternate constructor can be passed
         with constructor=).

        Sripping of quotes is essential for many applications in Qiime, as
         the tip names are frequently matched to OTU ids, and if the tip name
         is read in with leading/trailing quotes, node.Name won't match to the
         corresponding OTU identifier. Disaster follows.

    """
    return DndParser(lines, constructor=constructor, unescape_name=True)
 def test_PD_generic_whole_tree(self):
     """PD_generic_whole_tree should correctly compute PD for test tree."""
     self.t1 = DndParser('((a:1,b:2):4,(c:3,(d:1,e:1):2):3)', \
         UniFracTreeNode)
     self.env_str = """
     a   A   1
     a   C   2
     b   A   1
     b   B   1
     c   B   1
     d   B   3
     e   C   1"""
     env_counts = count_envs(self.env_str.splitlines())
     self.assertEqual(PD_generic_whole_tree(self.t1,self.env_counts), \
         (['A','B','C'], array([7.,15.,11.])))
 def test_shuffle_tipnames(self):
     """shuffle_tipnames should return copy of tree w/ labels permuted"""
     #Note: this should never fail but is technically still stochastic
     #5! is 120 so repeating 5 times should fail about 1 in 10^10.
     for i in range(5):
         try:
             t = DndParser(self.t_str)
             result = shuffle_tipnames(t)
             orig_names = [n.Name for n in t.tips()]
             new_names = [n.Name for n in result.tips()]
             self.assertIsPermutation(orig_names, new_names)
             return
         except AssertionError:
             continue
     raise AssertionError("Produced same permutation in 5 tries: broken?")
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    otu_table = parse_biom_table(open(opts.input_path, 'U'))
    tree = DndParser(open(opts.tree_path), UniFracTreeNode)
    dic = otu_table._data
    #A = dict_to_csmat(dic)
    A = dic
    otus_id = otu_table.ObservationIds
    if opts.metrics == 'unweighted':
        print unifrac_mix(A, otus_id, tree)
    if opts.metrics == 'weighted':
        s = sum_dict(dic)
        print unifrac_mix_weighted(A, otus_id, tree, s)
Beispiel #21
0
    def test_write_updated_tree_file(self):
        """Write tree out"""

        # create temp filename
        _, new_tree_fp = mkstemp(suffix='.tre')
        close(_)
        self._paths_to_clean_up.append(new_tree_fp)

        # parse and load tree
        tree = DndParser(StringIO(STARTING_TREE), constructor=PhyloNode)

        # write out temp tree
        write_updated_tree_file(new_tree_fp, tree)

        self.assertTrue(open(new_tree_fp).read() > 0)
    def test_shared_branch_length_to_root(self):
        """Should return the correct shared branch length by env to root"""
        t_str = "(((a:1,b:2):3,c:4),(d:5,e:6,f:7):8);"
        envs = """
a A 1
b A 1
c A 1
d A 1
e A 1
f B 1 
"""
        env_counts = count_envs(envs.splitlines())
        t = DndParser(t_str, UniFracTreeNode)
        exp = {'A': 29.0, 'B': 15.0}
        obs = shared_branch_length_to_root(t, env_counts)
        self.assertEqual(obs, exp)
Beispiel #23
0
    def test_join_nodes(self):
        """join them nodes! (((99 + 97) + 94) + 91) + ..."""
        parsed = [
            make_nodes(self.clst_99, 0.01, 99),
            make_nodes(self.clst_97, 0.02, 97),
            make_nodes(self.clst_94, 0.03, 94)
        ]

        exp = """((((3:.005)99_2_3:.01,(8:.005,7:.005)99_3_8:.01)97_0_3:.015)94_0_3,
                 (((1:.005,6:.005)99_1_1:.01)97_1_1:.015,
                 ((10:.005,20:.005,30:.005)99_0_10:.01)97_2_10:.015)94_1_1);"""
        expt = DndParser(exp)
        obs = join_nodes(parsed)

        self.assertEqual(obs.getNewick(with_distances=True),
                         expt.getNewick(with_distances=True))
def convert_tree_tips(align_map,tree_fp):
    """ rename the starting tree to correspond to the new phylip names, 
        which are assigned to each sequence """
    
    # flip key value pairs
    tree_tip_to_seq_name={}
    for i in align_map:
        tree_tip_to_seq_name[align_map[i]] = i

    # change the tip labels to phylip labels
    open_tree=open(tree_fp)
    tree=DndParser(open_tree, constructor=PhyloNode)
    for node in tree.tips():
        node.Name = tree_tip_to_seq_name[node.Name]
    
    return tree
Beispiel #25
0
def wagner_for_picrust(tree_path,
                       trait_table_path,
                       gain=None,
                       max_paralogs=None,
                       HALT_EXEC=False):
    '''Runs count application controller given path of tree and trait table and returns a Table'''
    #initialize Count app controller
    count = Count(HALT_EXEC=HALT_EXEC)

    #set the parameters
    if gain:
        count.Parameters['-gain'].on(gain)
    if max_paralogs:
        count.Parameters['-max_paralogs'].on(max_paralogs)

    ###Have to manipulate the trait table some. Need to transpose it and strip ids surrounded in quotes.
    table = LoadTable(filename=trait_table_path, header=True, sep='\t')

    #get the first column (containing row ids)
    genome_ids = table.getRawData(table.Header[0])
    #remove single quotes from the id if they exist
    genome_ids = [str(id).strip('\'') for id in genome_ids]
    #transpose the matrix
    table = table.transposed(new_column_name=table.Header[0])
    #Change the headers
    table = table.withNewHeader(table.Header[1:], genome_ids)
    #write the modified table to a tmp file
    tmp_table_path = get_tmp_filename()
    table.writeToFile(tmp_table_path, sep='\t')

    #Run Count here
    result = count(data=(tree_path, tmp_table_path))

    #Remove tmp file
    remove(tmp_table_path)

    #tree=LoadTree(tree_path)
    tree = DndParser(open(tree_path))

    #parse the results into a Cogent Table
    asr_table = parse_wagner_parsimony_output(result["StdOut"].readlines(),
                                              remove_num_tips=len(tree.tips()))

    #transpose the table
    asr_table = asr_table.transposed(new_column_name='nodes')

    return asr_table
def fast_p_test_file(tree_in,
                     envs_in,
                     num_iters=1000,
                     verbose=False,
                     test_on=TEST_ON_PAIRWISE):
    """ Wrapper to read tree and envs from files. """
    result = []
    t = DndParser(tree_in, UniFracTreeNode)
    envs = count_envs(envs_in)
    unique_envs, num_uenvs = get_unique_envs(envs)
    # calculate real, sim vals and p-vals for each pair of envs in tree
    if test_on == TEST_ON_PAIRWISE:
        cur_num_comps = num_comps(num_uenvs)
        for i in range(num_uenvs):
            first_env = unique_envs[i]
            for j in range(i + 1, num_uenvs):
                second_env = unique_envs[j]
                real = fast_p_test(t,
                                   envs,
                                   num_iters=1,
                                   first_env=first_env,
                                   second_env=second_env,
                                   permutation_f=identity)[0]
                sim = fast_p_test(t,
                                  envs,
                                  num_iters,
                                  first_env=first_env,
                                  second_env=second_env)
                raw_pval, cor_pval = mcarlo_sig(real,
                                                sim,
                                                cur_num_comps,
                                                tail='low')
                result.append((first_env, second_env, raw_pval, cor_pval))
                if verbose:
                    print "P Test: env %s vs %s" % (first_env, second_env)
                    print raw_pval, cor_pval, num_uenvs, cur_num_comps, 'low'
    # calculate real, sim vals and p-vals for whole tree
    elif test_on == TEST_ON_TREE:
        real = fast_p_test(t, envs, num_iters=1, permutation_f=identity)[0]
        sim = fast_p_test(t, envs, num_iters)
        raw_pval, cor_pval = mcarlo_sig(real, sim, 1, tail='low')
        result.append(('Whole Tree', raw_pval))
    else:
        raise ValueError, "Invalid test_on value: %s" % str(test_on)

    return result
Beispiel #27
0
def build_tree_from_distance_matrix(matrix, best_tree=False, params={},\
    working_dir='/tmp'):
    """Returns a tree from a distance matrix.

    matrix: a square Dict2D object (cogent.util.dict2d)
    
    best_tree: if True (default:False), uses a slower but more accurate
    algorithm to build the tree.

    params: dict of parameters to pass in to the Clearcut app controller.

    The result will be an cogent.core.tree.PhyloNode object, or None if tree
    fails.
    """
    params['--out'] = get_tmp_filename(working_dir)

    # Create instance of app controller, enable tree, disable alignment
    app = Clearcut(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir=working_dir, SuppressStdout=True,\
                   SuppressStderr=True)
    #Turn off input as alignment
    app.Parameters['-a'].off()
    #Input is a distance matrix
    app.Parameters['-d'].on()

    if best_tree:
        app.Parameters['-N'].on()

    # Turn the dict2d object into the expected input format
    matrix_input, int_keys = _matrix_input_from_dict2d(matrix)

    # Collect result
    result = app(matrix_input)

    # Build tree
    tree = DndParser(result['Tree'].read(), constructor=PhyloNode)

    # reassign to original names
    for node in tree.tips():
        node.Name = int_keys[node.Name]

    # Clean up
    result.cleanUp()
    del (app, result, params)

    return tree
Beispiel #28
0
def build_tree_from_json_using_params(fname, output_dir='/tmp/', params={}):
    """Returns a tree from a json.
    
    fname: filepath to input json 
    
    output_dir: location of output files
    
    params: dict of parameters to pass in to the RAxML app controller.

    The result will be a Tree.
    """

    # convert aln to fasta in case it is not already a fasta file

    ih = '_input_as_multiline_string'

    guppy_app = Guppy(params=params,
                      InputHandler=ih,
                      WorkingDir=output_dir,
                      TmpDir=output_dir,
                      SuppressStderr=True,
                      SuppressStdout=True,
                      HALT_EXEC=False)

    guppy_result = guppy_app(open(fname).read())

    try:
        new_tree = guppy_result['result'].read()
    except:
        # catch the error of not producing any results and print the command
        # run so user can check error
        guppy_cmd = Guppy(params=params,
                          InputHandler=ih,
                          WorkingDir=output_dir,
                          TmpDir=output_dir,
                          SuppressStderr=True,
                          SuppressStdout=True,
                          HALT_EXEC=True)
        out_msg = guppy_cmd(open(fname).read())

    tree = DndParser(new_tree, constructor=PhyloNode)

    guppy_result.cleanUp()

    return tree
Beispiel #29
0
def build_tree_from_alignment(aln, moltype, best_tree=False, params=None):
    """Returns a tree from Alignment object aln.
    
    aln: a cogent.core.alignment.Alignment object, or data that can be used 
    to build one.
    
    moltype: cogent.core.moltype.MolType object

    best_tree: unsupported
    
    params: dict of parameters to pass in to the Muscle app controller.
    
    The result will be an cogent.core.tree.PhyloNode object, or None if tree 
    fails.
    """
    # Create instance of app controller, enable tree, disable alignment
    app = Muscle(InputHandler='_input_as_multiline_string', params=params, \
                   WorkingDir='/tmp')

    app.Parameters['-cluster'].on()
    app.Parameters['-tree1'].on(get_tmp_filename(app.WorkingDir))
    app.Parameters['-seqtype'].on(moltype.label)

    seq_collection = SequenceCollection(aln, MolType=moltype)

    #Create mapping between abbreviated IDs and full IDs
    int_map, int_keys = seq_collection.getIntMap()
    #Create SequenceCollection from int_map.
    int_map = SequenceCollection(int_map,MolType=moltype)


    # Collect result
    result = app(int_map.toFasta())

    # Build tree
    tree = DndParser(result['Tree1Out'].read(), constructor=PhyloNode)
    
    for tip in tree.tips():
        tip.Name = int_keys[tip.Name]

    # Clean up
    result.cleanUp()
    del(seq_collection, app, result)

    return tree
Beispiel #30
0
    def test_build_tree_from_alignment_using_params(self):
        """Builds a tree from a json file"""

        # define working directory for output
        outdir = '/tmp/'

        # set params
        params = {}

        params["tog"] = None

        # build tree
        tree = build_tree_from_json_using_params(self.json_fname,
                                                 output_dir=outdir,
                                                 params=params)

        self.assertEqual(
            tree.getNewick(),
            DndParser(TREE_RESULT, constructor=PhyloNode).getNewick())