コード例 #1
0
def fasta_to_tree(DIR,
                  fasta,
                  num_cores,
                  seqtype,
                  num_seq_cutoff=NUM_SEQ_CUTOFF):
    """
	given a fasta file
	align, trim alignment and build a tree
	choose appropriate tools depending on size of the fasta file
	"""
    if DIR[-1] != "/": DIR += "/"
    seqcount, maxlen = get_fasta_size(DIR + fasta)
    assert seqcount >= 4, "Less than four sequences in " + DIR + fasta
    print fasta, seqcount, "sequences"
    if seqcount >= NUM_SEQ_CUTOFF:  # large cluster
        print "running pasta"
        alignment = pasta(DIR, fasta, num_cores, seqtype)
        cleaned = pxclsq(DIR, alignment, 0.01, seqtype)
        if len(read_fasta_file(DIR + cleaned)) >= 4:
            tree = fasttree(DIR, cleaned, seqtype)
        else:
            print "Less than 4 taxa in", cleaned
    else:  # small cluster
        alignment = mafft(DIR, fasta, num_cores, seqtype)
        cleaned = pxclsq(DIR, alignment, 0.1, seqtype)
        if len(read_fasta_file(DIR + cleaned)) >= 4:
            tree = raxml(DIR, cleaned, num_cores, seqtype)
        else:
            print "Less than 4 taxa in", cleaned
コード例 #2
0
def fasta_to_bs_tree(DIR, fasta, num_cores, seqtype):
    """
	given a fasta file for the final homolog
	align, trim alignment and build a tree with bootstrap support
	"""
    if DIR[-1] != "/": DIR += "/"
    seqcount, maxlen = get_fasta_size(DIR + fasta)
    assert seqcount >= 4, "Less than four sequences in " + DIR + fasta
    print fasta, seqcount, "sequences"
    alignment = mafft(DIR, fasta, num_cores, seqtype)
    cleaned = phyutility(DIR, alignment, 0.2, seqtype)
    if len(read_fasta_file(DIR + cleaned)) >= 4:
        tree = raxml_bs(DIR, cleaned, num_cores, seqtype)
    else:
        print "Less than 4 taxa in", cleaned
コード例 #3
0
def fasta_to_tree(DIR,
                  fasta,
                  num_cores,
                  seqtype,
                  num_seq_cutoff=NUM_SEQ_CUTOFF):
    """
	given a fasta file
	align, trim alignment and build a tree
	choose appropriate tools depending on size of the fasta file
	"""
    if DIR[-1] != "/": DIR += "/"
    seqcount, maxlen = get_fasta_size(DIR + fasta)
    assert seqcount >= 3, "Less than three sequences in " + DIR + fasta
    print fasta, seqcount, "sequences"
    if seqcount >= NUM_SEQ_CUTOFF:  # large cluster
        alignment = pasta(DIR, fasta, num_cores, seqtype)
        #		cleaned = phyutility(DIR,alignment,0.01,seqtype)
        cleaned = trimal(
            DIR, alignment, 0.5,
            0.001)  # use trimal-added by Tao, now need to def trimal
        if len(read_fasta_file(DIR + cleaned)) >= 3:
            tree = fasttree(DIR, cleaned, seqtype)
        else:
            print "Less than 3 taxa in", cleaned
    else:  # small cluster
        alignment = mafft(DIR, fasta, num_cores, seqtype)
        #		cleaned = phyutility(DIR,alignment,0.1,seqtype) "phyutility can only trim gaps-added by tao"
        cleaned = trimal(
            DIR, alignment, 0.5,
            0.001)  # use trimal-added by Tao, now need to def trimal
        seqcount, maxlen = get_fasta_size(DIR + cleaned)
        print cleaned, seqcount, "sequences"
        if len(read_fasta_file(DIR + cleaned)) >= 3:
            tree = fasttree(DIR, cleaned, seqtype)
            #tree = raxml(DIR,cleaned,num_cores,seqtype)
        #if len(read_fasta_file(DIR+cleaned)) == 3: # added by Tao
        #tree = fasttree(DIR,cleaned,seqtype) # use added by Tao
        else:
            print "Less than 3 taxa in", cleaned