def fasta_to_tree(DIR, fasta, num_cores, seqtype, num_seq_cutoff=NUM_SEQ_CUTOFF): """ given a fasta file align, trim alignment and build a tree choose appropriate tools depending on size of the fasta file """ if DIR[-1] != "/": DIR += "/" seqcount, maxlen = get_fasta_size(DIR + fasta) assert seqcount >= 4, "Less than four sequences in " + DIR + fasta print fasta, seqcount, "sequences" if seqcount >= NUM_SEQ_CUTOFF: # large cluster print "running pasta" alignment = pasta(DIR, fasta, num_cores, seqtype) cleaned = pxclsq(DIR, alignment, 0.01, seqtype) if len(read_fasta_file(DIR + cleaned)) >= 4: tree = fasttree(DIR, cleaned, seqtype) else: print "Less than 4 taxa in", cleaned else: # small cluster alignment = mafft(DIR, fasta, num_cores, seqtype) cleaned = pxclsq(DIR, alignment, 0.1, seqtype) if len(read_fasta_file(DIR + cleaned)) >= 4: tree = raxml(DIR, cleaned, num_cores, seqtype) else: print "Less than 4 taxa in", cleaned
def fasta_to_bs_tree(DIR, fasta, num_cores, seqtype): """ given a fasta file for the final homolog align, trim alignment and build a tree with bootstrap support """ if DIR[-1] != "/": DIR += "/" seqcount, maxlen = get_fasta_size(DIR + fasta) assert seqcount >= 4, "Less than four sequences in " + DIR + fasta print fasta, seqcount, "sequences" alignment = mafft(DIR, fasta, num_cores, seqtype) cleaned = phyutility(DIR, alignment, 0.2, seqtype) if len(read_fasta_file(DIR + cleaned)) >= 4: tree = raxml_bs(DIR, cleaned, num_cores, seqtype) else: print "Less than 4 taxa in", cleaned
def fasta_to_tree(DIR, fasta, num_cores, seqtype, num_seq_cutoff=NUM_SEQ_CUTOFF): """ given a fasta file align, trim alignment and build a tree choose appropriate tools depending on size of the fasta file """ if DIR[-1] != "/": DIR += "/" seqcount, maxlen = get_fasta_size(DIR + fasta) assert seqcount >= 3, "Less than three sequences in " + DIR + fasta print fasta, seqcount, "sequences" if seqcount >= NUM_SEQ_CUTOFF: # large cluster alignment = pasta(DIR, fasta, num_cores, seqtype) # cleaned = phyutility(DIR,alignment,0.01,seqtype) cleaned = trimal( DIR, alignment, 0.5, 0.001) # use trimal-added by Tao, now need to def trimal if len(read_fasta_file(DIR + cleaned)) >= 3: tree = fasttree(DIR, cleaned, seqtype) else: print "Less than 3 taxa in", cleaned else: # small cluster alignment = mafft(DIR, fasta, num_cores, seqtype) # cleaned = phyutility(DIR,alignment,0.1,seqtype) "phyutility can only trim gaps-added by tao" cleaned = trimal( DIR, alignment, 0.5, 0.001) # use trimal-added by Tao, now need to def trimal seqcount, maxlen = get_fasta_size(DIR + cleaned) print cleaned, seqcount, "sequences" if len(read_fasta_file(DIR + cleaned)) >= 3: tree = fasttree(DIR, cleaned, seqtype) #tree = raxml(DIR,cleaned,num_cores,seqtype) #if len(read_fasta_file(DIR+cleaned)) == 3: # added by Tao #tree = fasttree(DIR,cleaned,seqtype) # use added by Tao else: print "Less than 3 taxa in", cleaned