Example #1
0
def cluster_seqs(seqs,
                 neighbor_join=False,
                 params={},
                 add_seq_names=True,
                 WorkingDir=None,
                 SuppressStderr=None,
                 SuppressStdout=None,
                 max_chars=1000000,
                 max_hours=1.0,
                 constructor=PhyloNode,
                 clean_up=True
                 ):
    """Muscle cluster list of sequences.

    seqs: either file name or list of sequence objects or list of strings or
        single multiline string containing sequences.
    
    Addl docs coming soon
    """
    num_seqs = len(seqs)
    if num_seqs < 2:
        raise ValueError, "Muscle requres 2 or more sequences to cluster."


    num_chars = sum(map(len, seqs))
    if num_chars > max_chars:
        params["-maxiters"] = 2 
        params["-diags1"] = True
        params["-sv"] = True
        #params["-distance1"] = "kmer6_6" 
        #params["-distance1"] = "kmer20_3" 
        #params["-distance1"] = "kbit20_3" 
        print "lots of chars, using fast align", num_chars


    params["-maxhours"] = max_hours
    #params["-maxiters"] = 10 

    #cluster_type = "upgmb"
    #if neighbor_join:
    #    cluster_type = "neighborjoining"
   
    params["-cluster"] = True 
    params["-tree1"] = get_tmp_filename(WorkingDir)

    muscle_res = muscle_seqs(seqs,
                 params=params,
                 add_seq_names=add_seq_names,
                 WorkingDir=WorkingDir,
                 SuppressStderr=SuppressStderr,
                 SuppressStdout=SuppressStdout)
   
    tree = DndParser(muscle_res["Tree1Out"], constructor=constructor)

    if clean_up:
        muscle_res.cleanUp()
    return tree 
Example #2
0
def aln_tree_seqs(seqs,
                 input_handler=None,
                 tree_type='neighborjoining',
                 params={},
                 add_seq_names=True,
                 WorkingDir=None,
                 SuppressStderr=None,
                 SuppressStdout=None,
                 max_hours=5.0,
                 constructor=PhyloNode,
                 clean_up=True
                 ):
    """Muscle align sequences and report tree from iteration2. 

    Unlike cluster_seqs, returns tree2 which is the tree made during the
    second muscle iteration (it should be more accurate that the cluster from
    the first iteration which is made fast based on  k-mer words)

    seqs: either file name or list of sequence objects or list of strings or
        single multiline string containing sequences.
    tree_type: can be either neighborjoining (default) or upgmb for UPGMA
    clean_up: When true, will clean up output files 
    """

    params["-maxhours"] = max_hours
    if tree_type:
        params["-cluster2"] = tree_type
    params["-tree2"] = get_tmp_filename(WorkingDir)
    params["-out"] = get_tmp_filename(WorkingDir)

    muscle_res = muscle_seqs(seqs,
                 input_handler=input_handler,
                 params=params,
                 add_seq_names=add_seq_names,
                 WorkingDir=WorkingDir,
                 SuppressStderr=SuppressStderr,
                 SuppressStdout=SuppressStdout)
    tree = DndParser(muscle_res["Tree2Out"], constructor=constructor)
    aln = [line for line in muscle_res["MuscleOut"]]

    if clean_up:
        muscle_res.cleanUp()
    return tree, aln
Example #3
0
def get_aligned_muscle(seq1,seq2):
    """Returns aligned sequences and frac_same using MUSCLE.

        THis needs to be moved to the muscle app controller
    """
    outname = get_tmp_filename()
    res = muscle_seqs([seq1,seq2], add_seq_names=True, WorkingDir="/tmp", out_filename=outname)
    #raise ValueError,  res['StdErr'].read()
    #raise ValueError, res
    seq1_aligned,seq2_aligned =list(MinimalFastaParser(res['MuscleOut'].read()))
    res.cleanUp()
    del(res)
    seq1_aligned = seq1_aligned[1][1:]
    seq2_aligned = seq2_aligned[1][1:]
    frac_same = sum(array(seq1_aligned) == array(seq2_aligned))/min(len(seq1),
                                                len(seq2))
    
    return seq1_aligned,seq2_aligned,frac_same